github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/drivers/docker/driver.go (about) 1 package docker 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/json" 7 "fmt" 8 "io/ioutil" 9 "net" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strconv" 14 "strings" 15 "sync" 16 "time" 17 18 docker "github.com/fsouza/go-dockerclient" 19 "github.com/hashicorp/consul-template/signals" 20 hclog "github.com/hashicorp/go-hclog" 21 multierror "github.com/hashicorp/go-multierror" 22 plugin "github.com/hashicorp/go-plugin" 23 "github.com/hashicorp/nomad/client/taskenv" 24 "github.com/hashicorp/nomad/drivers/docker/docklog" 25 "github.com/hashicorp/nomad/drivers/shared/eventer" 26 "github.com/hashicorp/nomad/drivers/shared/resolvconf" 27 nstructs "github.com/hashicorp/nomad/nomad/structs" 28 "github.com/hashicorp/nomad/plugins/base" 29 "github.com/hashicorp/nomad/plugins/drivers" 30 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 31 ) 32 33 var ( 34 // createClientsLock is a lock that protects reading/writing global client 35 // variables 36 createClientsLock sync.Mutex 37 38 // client is a docker client with a timeout of 5 minutes. This is for doing 39 // all operations with the docker daemon besides which are not long running 40 // such as creating, killing containers, etc. 41 client *docker.Client 42 43 // waitClient is a docker client with no timeouts. This is used for long 44 // running operations such as waiting on containers and collect stats 45 waitClient *docker.Client 46 47 dockerTransientErrs = []string{ 48 "Client.Timeout exceeded while awaiting headers", 49 "EOF", 50 "API error (500)", 51 } 52 53 // recoverableErrTimeouts returns a recoverable error if the error was due 54 // to timeouts 55 recoverableErrTimeouts = func(err error) error { 56 r := false 57 if strings.Contains(err.Error(), "Client.Timeout exceeded while awaiting headers") || 58 strings.Contains(err.Error(), "EOF") { 59 r = true 60 } 61 return nstructs.NewRecoverableError(err, r) 62 } 63 64 // taskHandleVersion is the version of task handle which this driver sets 65 // and understands how to decode driver state 66 taskHandleVersion = 1 67 68 // Nvidia-container-runtime environment variable names 69 nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES" 70 ) 71 72 const ( 73 dockerLabelAllocID = "com.hashicorp.nomad.alloc_id" 74 ) 75 76 type Driver struct { 77 // eventer is used to handle multiplexing of TaskEvents calls such that an 78 // event can be broadcast to all callers 79 eventer *eventer.Eventer 80 81 // config contains the runtime configuration for the driver set by the 82 // SetConfig RPC 83 config *DriverConfig 84 85 // clientConfig contains a driver specific subset of the Nomad client 86 // configuration 87 clientConfig *base.ClientDriverConfig 88 89 // ctx is the context for the driver. It is passed to other subsystems to 90 // coordinate shutdown 91 ctx context.Context 92 93 // tasks is the in memory datastore mapping taskIDs to taskHandles 94 tasks *taskStore 95 96 // coordinator is what tracks multiple image pulls against the same docker image 97 coordinator *dockerCoordinator 98 99 // logger will log to the Nomad agent 100 logger hclog.Logger 101 102 // gpuRuntime indicates nvidia-docker runtime availability 103 gpuRuntime bool 104 105 // A tri-state boolean to know if the fingerprinting has happened and 106 // whether it has been successful 107 fingerprintSuccess *bool 108 fingerprintLock sync.RWMutex 109 110 // A boolean to know if the docker driver has ever been correctly detected 111 // for use during fingerprinting. 112 detected bool 113 detectedLock sync.RWMutex 114 115 reconciler *containerReconciler 116 } 117 118 // NewDockerDriver returns a docker implementation of a driver plugin 119 func NewDockerDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin { 120 logger = logger.Named(pluginName) 121 return &Driver{ 122 eventer: eventer.NewEventer(ctx, logger), 123 config: &DriverConfig{}, 124 tasks: newTaskStore(), 125 ctx: ctx, 126 logger: logger, 127 } 128 } 129 130 func (d *Driver) reattachToDockerLogger(reattachConfig *pstructs.ReattachConfig) (docklog.DockerLogger, *plugin.Client, error) { 131 reattach, err := pstructs.ReattachConfigToGoPlugin(reattachConfig) 132 if err != nil { 133 return nil, nil, err 134 } 135 136 dlogger, dloggerPluginClient, err := docklog.ReattachDockerLogger(reattach) 137 if err != nil { 138 return nil, nil, fmt.Errorf("failed to reattach to docker logger process: %v", err) 139 } 140 141 return dlogger, dloggerPluginClient, nil 142 } 143 144 func (d *Driver) setupNewDockerLogger(container *docker.Container, cfg *drivers.TaskConfig, startTime time.Time) (docklog.DockerLogger, *plugin.Client, error) { 145 dlogger, pluginClient, err := docklog.LaunchDockerLogger(d.logger) 146 if err != nil { 147 if pluginClient != nil { 148 pluginClient.Kill() 149 } 150 return nil, nil, fmt.Errorf("failed to launch docker logger plugin: %v", err) 151 } 152 153 if err := dlogger.Start(&docklog.StartOpts{ 154 Endpoint: d.config.Endpoint, 155 ContainerID: container.ID, 156 TTY: container.Config.Tty, 157 Stdout: cfg.StdoutPath, 158 Stderr: cfg.StderrPath, 159 TLSCert: d.config.TLS.Cert, 160 TLSKey: d.config.TLS.Key, 161 TLSCA: d.config.TLS.CA, 162 StartTime: startTime.Unix(), 163 }); err != nil { 164 pluginClient.Kill() 165 return nil, nil, fmt.Errorf("failed to launch docker logger process %s: %v", container.ID, err) 166 } 167 168 return dlogger, pluginClient, nil 169 } 170 171 func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { 172 if _, ok := d.tasks.Get(handle.Config.ID); ok { 173 return nil 174 } 175 176 // COMPAT(0.10): pre 0.9 upgrade path check 177 if handle.Version == 0 { 178 return d.recoverPre09Task(handle) 179 } 180 181 var handleState taskHandleState 182 if err := handle.GetDriverState(&handleState); err != nil { 183 return fmt.Errorf("failed to decode driver task state: %v", err) 184 } 185 186 client, _, err := d.dockerClients() 187 if err != nil { 188 return fmt.Errorf("failed to get docker client: %v", err) 189 } 190 191 container, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 192 ID: handleState.ContainerID, 193 }) 194 if err != nil { 195 return fmt.Errorf("failed to inspect container for id %q: %v", handleState.ContainerID, err) 196 } 197 198 h := &taskHandle{ 199 client: client, 200 waitClient: waitClient, 201 logger: d.logger.With("container_id", container.ID), 202 task: handle.Config, 203 containerID: container.ID, 204 containerImage: container.Image, 205 doneCh: make(chan bool), 206 waitCh: make(chan struct{}), 207 removeContainerOnExit: d.config.GC.Container, 208 net: handleState.DriverNetwork, 209 } 210 211 if !d.config.DisableLogCollection { 212 h.dlogger, h.dloggerPluginClient, err = d.reattachToDockerLogger(handleState.ReattachConfig) 213 if err != nil { 214 d.logger.Warn("failed to reattach to docker logger process", "error", err) 215 216 h.dlogger, h.dloggerPluginClient, err = d.setupNewDockerLogger(container, handle.Config, time.Now()) 217 if err != nil { 218 if err := client.StopContainer(handleState.ContainerID, 0); err != nil { 219 d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err) 220 } 221 return fmt.Errorf("failed to setup replacement docker logger: %v", err) 222 } 223 224 if err := handle.SetDriverState(h.buildState()); err != nil { 225 if err := client.StopContainer(handleState.ContainerID, 0); err != nil { 226 d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err) 227 } 228 return fmt.Errorf("failed to store driver state: %v", err) 229 } 230 } 231 } 232 233 d.tasks.Set(handle.Config.ID, h) 234 go h.run() 235 236 return nil 237 } 238 239 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { 240 if _, ok := d.tasks.Get(cfg.ID); ok { 241 return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) 242 } 243 244 var driverConfig TaskConfig 245 246 if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { 247 return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) 248 } 249 250 if driverConfig.Image == "" { 251 return nil, nil, fmt.Errorf("image name required for docker driver") 252 } 253 254 driverConfig.Image = strings.TrimPrefix(driverConfig.Image, "https://") 255 256 handle := drivers.NewTaskHandle(taskHandleVersion) 257 handle.Config = cfg 258 259 // Initialize docker API clients 260 client, _, err := d.dockerClients() 261 if err != nil { 262 return nil, nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 263 } 264 265 id, err := d.createImage(cfg, &driverConfig, client) 266 if err != nil { 267 return nil, nil, err 268 } 269 270 if runtime.GOOS == "windows" { 271 err = d.convertAllocPathsForWindowsLCOW(cfg, driverConfig.Image) 272 if err != nil { 273 return nil, nil, err 274 } 275 } 276 277 containerCfg, err := d.createContainerConfig(cfg, &driverConfig, driverConfig.Image) 278 if err != nil { 279 d.logger.Error("failed to create container configuration", "image_name", driverConfig.Image, 280 "image_id", id, "error", err) 281 return nil, nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", driverConfig.Image, id, err) 282 } 283 284 startAttempts := 0 285 CREATE: 286 container, err := d.createContainer(client, containerCfg, driverConfig.Image) 287 if err != nil { 288 d.logger.Error("failed to create container", "error", err) 289 client.RemoveContainer(docker.RemoveContainerOptions{ 290 ID: containerCfg.Name, 291 Force: true, 292 }) 293 return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to create container: %v", err), err) 294 } 295 296 d.logger.Info("created container", "container_id", container.ID) 297 298 // We don't need to start the container if the container is already running 299 // since we don't create containers which are already present on the host 300 // and are running 301 if !container.State.Running { 302 // Start the container 303 if err := d.startContainer(container); err != nil { 304 d.logger.Error("failed to start container", "container_id", container.ID, "error", err) 305 client.RemoveContainer(docker.RemoveContainerOptions{ 306 ID: container.ID, 307 Force: true, 308 }) 309 // Some sort of docker race bug, recreating the container usually works 310 if strings.Contains(err.Error(), "OCI runtime create failed: container with id exists:") && startAttempts < 5 { 311 startAttempts++ 312 d.logger.Debug("reattempting container create/start sequence", "attempt", startAttempts, "container_id", id) 313 goto CREATE 314 } 315 return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("Failed to start container %s: %s", container.ID, err), err) 316 } 317 318 // Inspect container to get all of the container metadata as much of the 319 // metadata (eg networking) isn't populated until the container is started 320 runningContainer, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 321 ID: container.ID, 322 }) 323 if err != nil { 324 client.RemoveContainer(docker.RemoveContainerOptions{ 325 ID: container.ID, 326 Force: true, 327 }) 328 msg := "failed to inspect started container" 329 d.logger.Error(msg, "error", err) 330 client.RemoveContainer(docker.RemoveContainerOptions{ 331 ID: container.ID, 332 Force: true, 333 }) 334 return nil, nil, nstructs.NewRecoverableError(fmt.Errorf("%s %s: %s", msg, container.ID, err), true) 335 } 336 container = runningContainer 337 d.logger.Info("started container", "container_id", container.ID) 338 } else { 339 d.logger.Debug("re-attaching to container", "container_id", 340 container.ID, "container_state", container.State.String()) 341 } 342 343 collectingLogs := !d.config.DisableLogCollection 344 345 var dlogger docklog.DockerLogger 346 var pluginClient *plugin.Client 347 348 if collectingLogs { 349 dlogger, pluginClient, err = d.setupNewDockerLogger(container, cfg, time.Unix(0, 0)) 350 if err != nil { 351 d.logger.Error("an error occurred after container startup, terminating container", "container_id", container.ID) 352 client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true}) 353 return nil, nil, err 354 } 355 } 356 357 // Detect container address 358 ip, autoUse := d.detectIP(container, &driverConfig) 359 360 net := &drivers.DriverNetwork{ 361 PortMap: driverConfig.PortMap, 362 IP: ip, 363 AutoAdvertise: autoUse, 364 } 365 366 // Return a driver handle 367 h := &taskHandle{ 368 client: client, 369 waitClient: waitClient, 370 dlogger: dlogger, 371 dloggerPluginClient: pluginClient, 372 logger: d.logger.With("container_id", container.ID), 373 task: cfg, 374 containerID: container.ID, 375 containerImage: container.Image, 376 doneCh: make(chan bool), 377 waitCh: make(chan struct{}), 378 removeContainerOnExit: d.config.GC.Container, 379 net: net, 380 } 381 382 if err := handle.SetDriverState(h.buildState()); err != nil { 383 d.logger.Error("error encoding container occurred after startup, terminating container", "container_id", container.ID, "error", err) 384 if collectingLogs { 385 dlogger.Stop() 386 pluginClient.Kill() 387 } 388 client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true}) 389 return nil, nil, err 390 } 391 392 d.tasks.Set(cfg.ID, h) 393 go h.run() 394 395 return handle, net, nil 396 } 397 398 // createContainerClient is the subset of Docker Client methods used by the 399 // createContainer method to ease testing subtle error conditions. 400 type createContainerClient interface { 401 CreateContainer(docker.CreateContainerOptions) (*docker.Container, error) 402 InspectContainer(id string) (*docker.Container, error) 403 ListContainers(docker.ListContainersOptions) ([]docker.APIContainers, error) 404 RemoveContainer(opts docker.RemoveContainerOptions) error 405 } 406 407 // createContainer creates the container given the passed configuration. It 408 // attempts to handle any transient Docker errors. 409 func (d *Driver) createContainer(client createContainerClient, config docker.CreateContainerOptions, 410 image string) (*docker.Container, error) { 411 // Create a container 412 attempted := 0 413 CREATE: 414 container, createErr := client.CreateContainer(config) 415 if createErr == nil { 416 return container, nil 417 } 418 419 d.logger.Debug("failed to create container", "container_name", 420 config.Name, "image_name", image, "image_id", config.Config.Image, 421 "attempt", attempted+1, "error", createErr) 422 423 // Volume management tools like Portworx may not have detached a volume 424 // from a previous node before Nomad started a task replacement task. 425 // Treat these errors as recoverable so we retry. 426 if strings.Contains(strings.ToLower(createErr.Error()), "volume is attached on another node") { 427 return nil, nstructs.NewRecoverableError(createErr, true) 428 } 429 430 // If the container already exists determine whether it's already 431 // running or if it's dead and needs to be recreated. 432 if strings.Contains(strings.ToLower(createErr.Error()), "container already exists") { 433 434 container, err := d.containerByName(config.Name) 435 if err != nil { 436 return nil, err 437 } 438 439 if container != nil && container.State.Running { 440 return container, nil 441 } 442 443 // Purge conflicting container if found. 444 // If container is nil here, the conflicting container was 445 // deleted in our check here, so retry again. 446 if container != nil { 447 // Delete matching containers 448 err = client.RemoveContainer(docker.RemoveContainerOptions{ 449 ID: container.ID, 450 Force: true, 451 }) 452 if err != nil { 453 d.logger.Error("failed to purge container", "container_id", container.ID) 454 return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err)) 455 } else { 456 d.logger.Info("purged container", "container_id", container.ID) 457 } 458 } 459 460 if attempted < 5 { 461 attempted++ 462 time.Sleep(nextBackoff(attempted)) 463 goto CREATE 464 } 465 } else if strings.Contains(strings.ToLower(createErr.Error()), "no such image") { 466 // There is still a very small chance this is possible even with the 467 // coordinator so retry. 468 return nil, nstructs.NewRecoverableError(createErr, true) 469 } else if isDockerTransientError(createErr) && attempted < 5 { 470 attempted++ 471 time.Sleep(nextBackoff(attempted)) 472 goto CREATE 473 } 474 475 return nil, recoverableErrTimeouts(createErr) 476 } 477 478 // startContainer starts the passed container. It attempts to handle any 479 // transient Docker errors. 480 func (d *Driver) startContainer(c *docker.Container) error { 481 // Start a container 482 attempted := 0 483 START: 484 startErr := client.StartContainer(c.ID, c.HostConfig) 485 if startErr == nil || strings.Contains(startErr.Error(), "Container already running") { 486 return nil 487 } 488 489 d.logger.Debug("failed to start container", "container_id", c.ID, "attempt", attempted+1, "error", startErr) 490 491 if isDockerTransientError(startErr) { 492 if attempted < 5 { 493 attempted++ 494 time.Sleep(nextBackoff(attempted)) 495 goto START 496 } 497 return nstructs.NewRecoverableError(startErr, true) 498 } 499 500 return recoverableErrTimeouts(startErr) 501 } 502 503 // nextBackoff returns appropriate docker backoff durations after attempted attempts. 504 func nextBackoff(attempted int) time.Duration { 505 // attempts in 200ms, 800ms, 3.2s, 12.8s, 51.2s 506 // TODO: add randomization factor and extract to a helper 507 return 1 << (2 * uint64(attempted)) * 50 * time.Millisecond 508 } 509 510 // createImage creates a docker image either by pulling it from a registry or by 511 // loading it from the file system 512 func (d *Driver) createImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (string, error) { 513 image := driverConfig.Image 514 repo, tag := parseDockerImage(image) 515 516 // We're going to check whether the image is already downloaded. If the tag 517 // is "latest", or ForcePull is set, we have to check for a new version every time so we don't 518 // bother to check and cache the id here. We'll download first, then cache. 519 if driverConfig.ForcePull { 520 d.logger.Debug("force pulling image instead of inspecting local", "image_ref", dockerImageRef(repo, tag)) 521 } else if tag != "latest" { 522 if dockerImage, _ := client.InspectImage(image); dockerImage != nil { 523 // Image exists so just increment its reference count 524 d.coordinator.IncrementImageReference(dockerImage.ID, image, task.ID) 525 return dockerImage.ID, nil 526 } 527 } 528 529 // Load the image if specified 530 if driverConfig.LoadImage != "" { 531 return d.loadImage(task, driverConfig, client) 532 } 533 534 // Download the image 535 return d.pullImage(task, driverConfig, client, repo, tag) 536 } 537 538 // pullImage creates an image by pulling it from a docker registry 539 func (d *Driver) pullImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client, repo, tag string) (id string, err error) { 540 authOptions, err := d.resolveRegistryAuthentication(driverConfig, repo) 541 if err != nil { 542 if driverConfig.AuthSoftFail { 543 d.logger.Warn("Failed to find docker repo auth", "repo", repo, "error", err) 544 } else { 545 return "", fmt.Errorf("Failed to find docker auth for repo %q: %v", repo, err) 546 } 547 } 548 549 if authIsEmpty(authOptions) { 550 d.logger.Debug("did not find docker auth for repo", "repo", repo) 551 } 552 553 d.eventer.EmitEvent(&drivers.TaskEvent{ 554 TaskID: task.ID, 555 AllocID: task.AllocID, 556 TaskName: task.Name, 557 Timestamp: time.Now(), 558 Message: "Downloading image", 559 Annotations: map[string]string{ 560 "image": dockerImageRef(repo, tag), 561 }, 562 }) 563 564 pullDur, err := time.ParseDuration(driverConfig.ImagePullTimeout) 565 if err != nil { 566 return "", fmt.Errorf("Failed to parse image_pull_timeout: %v", err) 567 } 568 569 return d.coordinator.PullImage(driverConfig.Image, authOptions, task.ID, d.emitEventFunc(task), pullDur, d.config.pullActivityTimeoutDuration) 570 } 571 572 func (d *Driver) emitEventFunc(task *drivers.TaskConfig) LogEventFn { 573 return func(msg string, annotations map[string]string) { 574 d.eventer.EmitEvent(&drivers.TaskEvent{ 575 TaskID: task.ID, 576 AllocID: task.AllocID, 577 TaskName: task.Name, 578 Timestamp: time.Now(), 579 Message: msg, 580 Annotations: annotations, 581 }) 582 } 583 } 584 585 // authBackend encapsulates a function that resolves registry credentials. 586 type authBackend func(string) (*docker.AuthConfiguration, error) 587 588 // resolveRegistryAuthentication attempts to retrieve auth credentials for the 589 // repo, trying all authentication-backends possible. 590 func (d *Driver) resolveRegistryAuthentication(driverConfig *TaskConfig, repo string) (*docker.AuthConfiguration, error) { 591 return firstValidAuth(repo, []authBackend{ 592 authFromTaskConfig(driverConfig), 593 authFromDockerConfig(d.config.Auth.Config), 594 authFromHelper(d.config.Auth.Helper), 595 }) 596 } 597 598 // loadImage creates an image by loading it from the file system 599 func (d *Driver) loadImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (id string, err error) { 600 601 archive := filepath.Join(task.TaskDir().LocalDir, driverConfig.LoadImage) 602 d.logger.Debug("loading image from disk", "archive", archive) 603 604 f, err := os.Open(archive) 605 if err != nil { 606 return "", fmt.Errorf("unable to open image archive: %v", err) 607 } 608 609 if err := client.LoadImage(docker.LoadImageOptions{InputStream: f}); err != nil { 610 return "", err 611 } 612 f.Close() 613 614 dockerImage, err := client.InspectImage(driverConfig.Image) 615 if err != nil { 616 return "", recoverableErrTimeouts(err) 617 } 618 619 d.coordinator.IncrementImageReference(dockerImage.ID, driverConfig.Image, task.ID) 620 return dockerImage.ID, nil 621 } 622 623 func (d *Driver) convertAllocPathsForWindowsLCOW(task *drivers.TaskConfig, image string) error { 624 imageConfig, err := client.InspectImage(image) 625 if err != nil { 626 return fmt.Errorf("the image does not exist: %v", err) 627 } 628 // LCOW If we are running a Linux Container on Windows, we need to mount it correctly, as c:\ does not exist on unix 629 if imageConfig.OS == "linux" { 630 a := []rune(task.Env[taskenv.AllocDir]) 631 task.Env[taskenv.AllocDir] = strings.ReplaceAll(string(a[2:]), "\\", "/") 632 l := []rune(task.Env[taskenv.TaskLocalDir]) 633 task.Env[taskenv.TaskLocalDir] = strings.ReplaceAll(string(l[2:]), "\\", "/") 634 s := []rune(task.Env[taskenv.SecretsDir]) 635 task.Env[taskenv.SecretsDir] = strings.ReplaceAll(string(s[2:]), "\\", "/") 636 } 637 return nil 638 } 639 640 func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConfig) ([]string, error) { 641 allocDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SharedAllocDir, task.Env[taskenv.AllocDir]) 642 taskLocalBind := fmt.Sprintf("%s:%s", task.TaskDir().LocalDir, task.Env[taskenv.TaskLocalDir]) 643 secretDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SecretsDir, task.Env[taskenv.SecretsDir]) 644 binds := []string{allocDirBind, taskLocalBind, secretDirBind} 645 646 taskLocalBindVolume := driverConfig.VolumeDriver == "" 647 648 if !d.config.Volumes.Enabled && !taskLocalBindVolume { 649 return nil, fmt.Errorf("volumes are not enabled; cannot use volume driver %q", driverConfig.VolumeDriver) 650 } 651 652 for _, userbind := range driverConfig.Volumes { 653 // This assumes host OS = docker container OS. 654 // Not true, when we support Linux containers on Windows 655 src, dst, mode, err := parseVolumeSpec(userbind, runtime.GOOS) 656 if err != nil { 657 return nil, fmt.Errorf("invalid docker volume %q: %v", userbind, err) 658 } 659 660 // Paths inside task dir are always allowed when using the default driver, 661 // Relative paths are always allowed as they mount within a container 662 // When a VolumeDriver is set, we assume we receive a binding in the format 663 // volume-name:container-dest 664 // Otherwise, we assume we receive a relative path binding in the format 665 // relative/to/task:/also/in/container 666 if taskLocalBindVolume { 667 src = expandPath(task.TaskDir().Dir, src) 668 } else { 669 // Resolve dotted path segments 670 src = filepath.Clean(src) 671 } 672 673 if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, src) { 674 return nil, fmt.Errorf("volumes are not enabled; cannot mount host paths: %+q", userbind) 675 } 676 677 bind := src + ":" + dst 678 if mode != "" { 679 bind += ":" + mode 680 } 681 binds = append(binds, bind) 682 } 683 684 if selinuxLabel := d.config.Volumes.SelinuxLabel; selinuxLabel != "" { 685 // Apply SELinux Label to each volume 686 for i := range binds { 687 binds[i] = fmt.Sprintf("%s:%s", binds[i], selinuxLabel) 688 } 689 } 690 691 return binds, nil 692 } 693 694 var userMountToUnixMount = map[string]string{ 695 // Empty string maps to `rprivate` for backwards compatibility in restored 696 // older tasks, where mount propagation will not be present. 697 "": "rprivate", 698 nstructs.VolumeMountPropagationPrivate: "rprivate", 699 nstructs.VolumeMountPropagationHostToTask: "rslave", 700 nstructs.VolumeMountPropagationBidirectional: "rshared", 701 } 702 703 // takes a local seccomp daemon, reads the file contents for sending to the daemon 704 // this code modified slightly from the docker CLI code 705 // https://github.com/docker/cli/blob/8ef8547eb6934b28497d309d21e280bcd25145f5/cli/command/container/opts.go#L840 706 func parseSecurityOpts(securityOpts []string) ([]string, error) { 707 for key, opt := range securityOpts { 708 con := strings.SplitN(opt, "=", 2) 709 if len(con) == 1 && con[0] != "no-new-privileges" { 710 if strings.Contains(opt, ":") { 711 con = strings.SplitN(opt, ":", 2) 712 } else { 713 return securityOpts, fmt.Errorf("invalid security_opt: %q", opt) 714 } 715 } 716 if con[0] == "seccomp" && con[1] != "unconfined" { 717 f, err := ioutil.ReadFile(con[1]) 718 if err != nil { 719 return securityOpts, fmt.Errorf("opening seccomp profile (%s) failed: %v", con[1], err) 720 } 721 b := bytes.NewBuffer(nil) 722 if err := json.Compact(b, f); err != nil { 723 return securityOpts, fmt.Errorf("compacting json for seccomp profile (%s) failed: %v", con[1], err) 724 } 725 securityOpts[key] = fmt.Sprintf("seccomp=%s", b.Bytes()) 726 } 727 } 728 729 return securityOpts, nil 730 } 731 732 // memoryLimits computes the memory and memory_reservation values passed along to 733 // the docker host config. These fields represent hard and soft memory limits from 734 // docker's perspective, respectively. 735 // 736 // The memory field on the task configuration can be interpreted as a hard or soft 737 // limit. Before Nomad v0.11.3, it was always a hard limit. Now, it is interpreted 738 // as a soft limit if the memory_hard_limit value is configured on the docker 739 // task driver configuration. When memory_hard_limit is set, the docker host 740 // config is configured such that the memory field is equal to memory_hard_limit 741 // value, and the memory_reservation field is set to the task driver memory value. 742 // 743 // If memory_hard_limit is not set (i.e. zero value), then the memory field of 744 // the task resource config is interpreted as a hard limit. In this case both the 745 // memory is set to the task resource memory value and memory_reservation is left 746 // unset. 747 // 748 // Returns (memory (hard), memory_reservation (soft)) values in bytes. 749 func (_ *Driver) memoryLimits(driverHardLimitMB, taskMemoryLimitBytes int64) (int64, int64) { 750 if driverHardLimitMB <= 0 { 751 return taskMemoryLimitBytes, 0 752 } 753 return driverHardLimitMB * 1024 * 1024, taskMemoryLimitBytes 754 } 755 756 func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig, 757 imageID string) (docker.CreateContainerOptions, error) { 758 759 // ensure that PortMap variables are populated early on 760 task.Env = taskenv.SetPortMapEnvs(task.Env, driverConfig.PortMap) 761 762 logger := d.logger.With("task_name", task.Name) 763 var c docker.CreateContainerOptions 764 if task.Resources == nil { 765 // Guard against missing resources. We should never have been able to 766 // schedule a job without specifying this. 767 logger.Error("task.Resources is empty") 768 return c, fmt.Errorf("task.Resources is empty") 769 } 770 binds, err := d.containerBinds(task, driverConfig) 771 if err != nil { 772 return c, err 773 } 774 logger.Trace("binding volumes", "volumes", binds) 775 776 // create the config block that will later be consumed by go-dockerclient 777 config := &docker.Config{ 778 Image: imageID, 779 Entrypoint: driverConfig.Entrypoint, 780 Hostname: driverConfig.Hostname, 781 User: task.User, 782 Tty: driverConfig.TTY, 783 OpenStdin: driverConfig.Interactive, 784 } 785 786 if driverConfig.WorkDir != "" { 787 config.WorkingDir = driverConfig.WorkDir 788 } 789 790 containerRuntime := driverConfig.Runtime 791 if _, ok := task.DeviceEnv[nvidiaVisibleDevices]; ok { 792 if !d.gpuRuntime { 793 return c, fmt.Errorf("requested docker runtime %q was not found", d.config.GPURuntimeName) 794 } 795 if containerRuntime != "" && containerRuntime != d.config.GPURuntimeName { 796 return c, fmt.Errorf("conflicting runtime requests: gpu runtime %q conflicts with task runtime %q", d.config.GPURuntimeName, containerRuntime) 797 } 798 containerRuntime = d.config.GPURuntimeName 799 } 800 if _, ok := d.config.allowRuntimes[containerRuntime]; !ok && containerRuntime != "" { 801 return c, fmt.Errorf("requested runtime %q is not allowed", containerRuntime) 802 } 803 804 memory, memoryReservation := d.memoryLimits(driverConfig.MemoryHardLimit, task.Resources.LinuxResources.MemoryLimitBytes) 805 806 hostConfig := &docker.HostConfig{ 807 Memory: memory, // hard limit 808 MemoryReservation: memoryReservation, // soft limit 809 810 CPUShares: task.Resources.LinuxResources.CPUShares, 811 812 // Binds are used to mount a host volume into the container. We mount a 813 // local directory for storage and a shared alloc directory that can be 814 // used to share data between different tasks in the same task group. 815 Binds: binds, 816 817 StorageOpt: driverConfig.StorageOpt, 818 VolumeDriver: driverConfig.VolumeDriver, 819 820 PidsLimit: &driverConfig.PidsLimit, 821 822 Runtime: containerRuntime, 823 } 824 825 // This translates to docker create/run --cpuset-cpus option. 826 // --cpuset-cpus limit the specific CPUs or cores a container can use. 827 if driverConfig.CPUSetCPUs != "" { 828 hostConfig.CPUSetCPUs = driverConfig.CPUSetCPUs 829 } 830 831 // Calculate CPU Quota 832 // cfs_quota_us is the time per core, so we must 833 // multiply the time by the number of cores available 834 // See https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu 835 if driverConfig.CPUHardLimit { 836 numCores := runtime.NumCPU() 837 if driverConfig.CPUCFSPeriod < 0 || driverConfig.CPUCFSPeriod > 1000000 { 838 return c, fmt.Errorf("invalid value for cpu_cfs_period") 839 } 840 if driverConfig.CPUCFSPeriod == 0 { 841 driverConfig.CPUCFSPeriod = task.Resources.LinuxResources.CPUPeriod 842 } 843 hostConfig.CPUPeriod = driverConfig.CPUCFSPeriod 844 hostConfig.CPUQuota = int64(task.Resources.LinuxResources.PercentTicks*float64(driverConfig.CPUCFSPeriod)) * int64(numCores) 845 } 846 847 // Windows does not support MemorySwap/MemorySwappiness #2193 848 if runtime.GOOS == "windows" { 849 hostConfig.MemorySwap = 0 850 hostConfig.MemorySwappiness = nil 851 } else { 852 hostConfig.MemorySwap = memory 853 854 // disable swap explicitly in non-Windows environments 855 var swapiness int64 = 0 856 hostConfig.MemorySwappiness = &swapiness 857 858 } 859 860 loggingDriver := driverConfig.Logging.Type 861 if loggingDriver == "" { 862 loggingDriver = driverConfig.Logging.Driver 863 } 864 865 hostConfig.LogConfig = docker.LogConfig{ 866 Type: loggingDriver, 867 Config: driverConfig.Logging.Config, 868 } 869 870 if hostConfig.LogConfig.Type == "" && hostConfig.LogConfig.Config == nil { 871 logger.Trace("no docker log driver provided, defaulting to json-file") 872 hostConfig.LogConfig.Type = "json-file" 873 hostConfig.LogConfig.Config = map[string]string{ 874 "max-file": "2", 875 "max-size": "2m", 876 } 877 } 878 879 logger.Debug("configured resources", 880 "memory", hostConfig.Memory, "memory_reservation", hostConfig.MemoryReservation, 881 "cpu_shares", hostConfig.CPUShares, "cpu_quota", hostConfig.CPUQuota, 882 "cpu_period", hostConfig.CPUPeriod) 883 884 logger.Debug("binding directories", "binds", hclog.Fmt("%#v", hostConfig.Binds)) 885 886 // set privileged mode 887 if driverConfig.Privileged && !d.config.AllowPrivileged { 888 return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`) 889 } 890 hostConfig.Privileged = driverConfig.Privileged 891 892 // set capabilities 893 hostCapsWhitelistConfig := d.config.AllowCaps 894 hostCapsWhitelist := make(map[string]struct{}) 895 for _, cap := range hostCapsWhitelistConfig { 896 cap = strings.ToLower(strings.TrimSpace(cap)) 897 hostCapsWhitelist[cap] = struct{}{} 898 } 899 900 if _, ok := hostCapsWhitelist["all"]; !ok { 901 effectiveCaps, err := tweakCapabilities( 902 strings.Split(dockerBasicCaps, ","), 903 driverConfig.CapAdd, 904 driverConfig.CapDrop, 905 ) 906 if err != nil { 907 return c, err 908 } 909 var missingCaps []string 910 for _, cap := range effectiveCaps { 911 cap = strings.ToLower(cap) 912 if _, ok := hostCapsWhitelist[cap]; !ok { 913 missingCaps = append(missingCaps, cap) 914 } 915 } 916 if len(missingCaps) > 0 { 917 return c, fmt.Errorf("Docker driver doesn't have the following caps allowlisted on this Nomad agent: %s", missingCaps) 918 } 919 } 920 921 hostConfig.CapAdd = driverConfig.CapAdd 922 hostConfig.CapDrop = driverConfig.CapDrop 923 924 // set SHM size 925 if driverConfig.ShmSize != 0 { 926 hostConfig.ShmSize = driverConfig.ShmSize 927 } 928 929 // Setup devices 930 for _, device := range driverConfig.Devices { 931 dd, err := device.toDockerDevice() 932 if err != nil { 933 return c, err 934 } 935 hostConfig.Devices = append(hostConfig.Devices, dd) 936 } 937 for _, device := range task.Devices { 938 hostConfig.Devices = append(hostConfig.Devices, docker.Device{ 939 PathOnHost: device.HostPath, 940 PathInContainer: device.TaskPath, 941 CgroupPermissions: device.Permissions, 942 }) 943 } 944 945 // Setup mounts 946 for _, m := range driverConfig.Mounts { 947 hm, err := d.toDockerMount(&m, task) 948 if err != nil { 949 return c, err 950 } 951 hostConfig.Mounts = append(hostConfig.Mounts, *hm) 952 } 953 for _, m := range driverConfig.MountsList { 954 hm, err := d.toDockerMount(&m, task) 955 if err != nil { 956 return c, err 957 } 958 hostConfig.Mounts = append(hostConfig.Mounts, *hm) 959 } 960 961 // Setup DNS 962 // If task DNS options are configured Nomad will manage the resolv.conf file 963 // Docker driver dns options are not compatible with task dns options 964 if task.DNS != nil { 965 dnsMount, err := resolvconf.GenerateDNSMount(task.TaskDir().Dir, task.DNS) 966 if err != nil { 967 return c, fmt.Errorf("failed to build mount for resolv.conf: %v", err) 968 } 969 hostConfig.Mounts = append(hostConfig.Mounts, docker.HostMount{ 970 Target: dnsMount.TaskPath, 971 Source: dnsMount.HostPath, 972 Type: "bind", 973 ReadOnly: dnsMount.Readonly, 974 BindOptions: &docker.BindOptions{ 975 Propagation: dnsMount.PropagationMode, 976 }, 977 }) 978 } else { 979 if len(driverConfig.DNSSearchDomains) > 0 { 980 hostConfig.DNSSearch = driverConfig.DNSSearchDomains 981 } 982 if len(driverConfig.DNSOptions) > 0 { 983 hostConfig.DNSOptions = driverConfig.DNSOptions 984 } 985 // set DNS servers 986 for _, ip := range driverConfig.DNSServers { 987 if net.ParseIP(ip) != nil { 988 hostConfig.DNS = append(hostConfig.DNS, ip) 989 } else { 990 logger.Error("invalid ip address for container dns server", "ip", ip) 991 } 992 } 993 } 994 995 for _, m := range task.Mounts { 996 hm := docker.HostMount{ 997 Type: "bind", 998 Target: m.TaskPath, 999 Source: m.HostPath, 1000 ReadOnly: m.Readonly, 1001 } 1002 1003 // MountPropagation is only supported by Docker on Linux: 1004 // https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation 1005 if runtime.GOOS == "linux" { 1006 hm.BindOptions = &docker.BindOptions{ 1007 Propagation: userMountToUnixMount[m.PropagationMode], 1008 } 1009 } 1010 1011 hostConfig.Mounts = append(hostConfig.Mounts, hm) 1012 } 1013 1014 hostConfig.ExtraHosts = driverConfig.ExtraHosts 1015 1016 hostConfig.IpcMode = driverConfig.IPCMode 1017 hostConfig.PidMode = driverConfig.PidMode 1018 hostConfig.UTSMode = driverConfig.UTSMode 1019 hostConfig.UsernsMode = driverConfig.UsernsMode 1020 hostConfig.SecurityOpt = driverConfig.SecurityOpt 1021 hostConfig.Sysctls = driverConfig.Sysctl 1022 1023 hostConfig.SecurityOpt, err = parseSecurityOpts(driverConfig.SecurityOpt) 1024 if err != nil { 1025 return c, fmt.Errorf("failed to parse security_opt configuration: %v", err) 1026 } 1027 1028 ulimits, err := sliceMergeUlimit(driverConfig.Ulimit) 1029 if err != nil { 1030 return c, fmt.Errorf("failed to parse ulimit configuration: %v", err) 1031 } 1032 hostConfig.Ulimits = ulimits 1033 1034 hostConfig.ReadonlyRootfs = driverConfig.ReadonlyRootfs 1035 1036 // set the docker network mode 1037 hostConfig.NetworkMode = driverConfig.NetworkMode 1038 1039 // if the driver config does not specify a network mode then try to use the 1040 // shared alloc network 1041 if hostConfig.NetworkMode == "" { 1042 if task.NetworkIsolation != nil && task.NetworkIsolation.Path != "" { 1043 // find the previously created parent container to join networks with 1044 netMode := fmt.Sprintf("container:%s", task.NetworkIsolation.Labels[dockerNetSpecLabelKey]) 1045 logger.Debug("configuring network mode for task group", "network_mode", netMode) 1046 hostConfig.NetworkMode = netMode 1047 } else { 1048 // docker default 1049 logger.Debug("networking mode not specified; using default") 1050 hostConfig.NetworkMode = "default" 1051 } 1052 } 1053 1054 // Setup port mapping and exposed ports 1055 ports := newPublishedPorts(logger) 1056 switch { 1057 case task.Resources.Ports != nil && len(driverConfig.Ports) > 0: 1058 // Do not set up docker port mapping if shared alloc networking is used 1059 if strings.HasPrefix(hostConfig.NetworkMode, "container:") { 1060 break 1061 } 1062 1063 for _, port := range driverConfig.Ports { 1064 if mapping, ok := task.Resources.Ports.Get(port); ok { 1065 ports.add(mapping.Label, mapping.HostIP, mapping.Value, mapping.To) 1066 } else { 1067 return c, fmt.Errorf("Port %q not found, check network stanza", port) 1068 } 1069 } 1070 case len(task.Resources.NomadResources.Networks) > 0: 1071 network := task.Resources.NomadResources.Networks[0] 1072 1073 for _, port := range network.ReservedPorts { 1074 ports.addMapped(port.Label, network.IP, port.Value, driverConfig.PortMap) 1075 } 1076 1077 for _, port := range network.DynamicPorts { 1078 ports.addMapped(port.Label, network.IP, port.Value, driverConfig.PortMap) 1079 } 1080 1081 default: 1082 if len(driverConfig.PortMap) > 0 { 1083 if task.Resources.Ports != nil { 1084 return c, fmt.Errorf("'port_map' cannot map group network ports, use 'ports' instead") 1085 } 1086 return c, fmt.Errorf("Trying to map ports but no network interface is available") 1087 } 1088 } 1089 hostConfig.PortBindings = ports.publishedPorts 1090 config.ExposedPorts = ports.exposedPorts 1091 1092 // If the user specified a custom command to run, we'll inject it here. 1093 if driverConfig.Command != "" { 1094 // Validate command 1095 if err := validateCommand(driverConfig.Command, "args"); err != nil { 1096 return c, err 1097 } 1098 1099 cmd := []string{driverConfig.Command} 1100 if len(driverConfig.Args) != 0 { 1101 cmd = append(cmd, driverConfig.Args...) 1102 } 1103 logger.Debug("setting container startup command", "command", strings.Join(cmd, " ")) 1104 config.Cmd = cmd 1105 } else if len(driverConfig.Args) != 0 { 1106 config.Cmd = driverConfig.Args 1107 } 1108 1109 if len(driverConfig.Labels) > 0 { 1110 config.Labels = driverConfig.Labels 1111 } 1112 1113 labels := make(map[string]string, len(driverConfig.Labels)+1) 1114 for k, v := range driverConfig.Labels { 1115 labels[k] = v 1116 } 1117 labels[dockerLabelAllocID] = task.AllocID 1118 config.Labels = labels 1119 logger.Debug("applied labels on the container", "labels", config.Labels) 1120 1121 config.Env = task.EnvList() 1122 1123 containerName := fmt.Sprintf("%s-%s", strings.Replace(task.Name, "/", "_", -1), task.AllocID) 1124 logger.Debug("setting container name", "container_name", containerName) 1125 1126 var networkingConfig *docker.NetworkingConfig 1127 if len(driverConfig.NetworkAliases) > 0 || driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { 1128 networkingConfig = &docker.NetworkingConfig{ 1129 EndpointsConfig: map[string]*docker.EndpointConfig{ 1130 hostConfig.NetworkMode: {}, 1131 }, 1132 } 1133 } 1134 1135 if len(driverConfig.NetworkAliases) > 0 { 1136 networkingConfig.EndpointsConfig[hostConfig.NetworkMode].Aliases = driverConfig.NetworkAliases 1137 logger.Debug("setting container network aliases", "network_mode", hostConfig.NetworkMode, 1138 "network_aliases", strings.Join(driverConfig.NetworkAliases, ", ")) 1139 } 1140 1141 if driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { 1142 networkingConfig.EndpointsConfig[hostConfig.NetworkMode].IPAMConfig = &docker.EndpointIPAMConfig{ 1143 IPv4Address: driverConfig.IPv4Address, 1144 IPv6Address: driverConfig.IPv6Address, 1145 } 1146 logger.Debug("setting container network configuration", "network_mode", hostConfig.NetworkMode, 1147 "ipv4_address", driverConfig.IPv4Address, "ipv6_address", driverConfig.IPv6Address) 1148 } 1149 1150 if driverConfig.MacAddress != "" { 1151 config.MacAddress = driverConfig.MacAddress 1152 logger.Debug("setting container mac address", "mac_address", config.MacAddress) 1153 } 1154 1155 return docker.CreateContainerOptions{ 1156 Name: containerName, 1157 Config: config, 1158 HostConfig: hostConfig, 1159 NetworkingConfig: networkingConfig, 1160 }, nil 1161 } 1162 1163 func (d *Driver) toDockerMount(m *DockerMount, task *drivers.TaskConfig) (*docker.HostMount, error) { 1164 hm, err := m.toDockerHostMount() 1165 if err != nil { 1166 return nil, err 1167 } 1168 1169 switch hm.Type { 1170 case "bind": 1171 hm.Source = expandPath(task.TaskDir().Dir, hm.Source) 1172 1173 // paths inside alloc dir are always allowed as they mount within 1174 // a container, and treated as relative to task dir 1175 if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, hm.Source) { 1176 return nil, fmt.Errorf( 1177 "volumes are not enabled; cannot mount host path: %q %q", 1178 hm.Source, task.AllocDir) 1179 } 1180 case "tmpfs": 1181 // no source, so no sandbox check required 1182 default: // "volume", but also any new thing that comes along 1183 if !d.config.Volumes.Enabled { 1184 return nil, fmt.Errorf( 1185 "volumes are not enabled; cannot mount volume: %q", hm.Source) 1186 } 1187 } 1188 1189 return &hm, nil 1190 } 1191 1192 // detectIP of Docker container. Returns the first IP found as well as true if 1193 // the IP should be advertised (bridge network IPs return false). Returns an 1194 // empty string and false if no IP could be found. 1195 func (d *Driver) detectIP(c *docker.Container, driverConfig *TaskConfig) (string, bool) { 1196 if c.NetworkSettings == nil { 1197 // This should only happen if there's been a coding error (such 1198 // as not calling InspectContainer after CreateContainer). Code 1199 // defensively in case the Docker API changes subtly. 1200 d.logger.Error("no network settings for container", "container_id", c.ID) 1201 return "", false 1202 } 1203 1204 ip, ipName := "", "" 1205 auto := false 1206 for name, net := range c.NetworkSettings.Networks { 1207 if net.IPAddress == "" { 1208 // Ignore networks without an IP address 1209 continue 1210 } 1211 1212 ip = net.IPAddress 1213 if driverConfig.AdvertiseIPv6Addr { 1214 ip = net.GlobalIPv6Address 1215 auto = true 1216 } 1217 ipName = name 1218 1219 // Don't auto-advertise IPs for default networks (bridge on 1220 // Linux, nat on Windows) 1221 if name != "bridge" && name != "nat" { 1222 auto = true 1223 } 1224 1225 break 1226 } 1227 1228 if n := len(c.NetworkSettings.Networks); n > 1 { 1229 d.logger.Warn("multiple Docker networks for container found but Nomad only supports 1", 1230 "total_networks", n, 1231 "container_id", c.ID, 1232 "container_network", ipName) 1233 } 1234 1235 return ip, auto 1236 } 1237 1238 // containerByName finds a running container by name, and returns an error 1239 // if the container is dead or can't be found. 1240 func (d *Driver) containerByName(name string) (*docker.Container, error) { 1241 1242 client, _, err := d.dockerClients() 1243 if err != nil { 1244 return nil, err 1245 } 1246 containers, err := client.ListContainers(docker.ListContainersOptions{ 1247 All: true, 1248 }) 1249 if err != nil { 1250 d.logger.Error("failed to query list of containers matching name", 1251 "container_name", name) 1252 return nil, recoverableErrTimeouts( 1253 fmt.Errorf("Failed to query list of containers: %s", err)) 1254 } 1255 1256 // container names with a / pre-pended to the Nomad generated container names 1257 containerName := "/" + name 1258 var ( 1259 shimContainer docker.APIContainers 1260 found bool 1261 ) 1262 OUTER: 1263 for _, shimContainer = range containers { 1264 d.logger.Trace("listed container", "names", hclog.Fmt("%+v", shimContainer.Names)) 1265 for _, name := range shimContainer.Names { 1266 if name == containerName { 1267 d.logger.Trace("Found container", 1268 "container_name", containerName, "container_id", shimContainer.ID) 1269 found = true 1270 break OUTER 1271 } 1272 } 1273 } 1274 if !found { 1275 return nil, nil 1276 } 1277 1278 container, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 1279 ID: shimContainer.ID, 1280 }) 1281 if err != nil { 1282 err = fmt.Errorf("Failed to inspect container %s: %s", shimContainer.ID, err) 1283 1284 // This error is always recoverable as it could 1285 // be caused by races between listing 1286 // containers and this container being removed. 1287 // See #2802 1288 return nil, nstructs.NewRecoverableError(err, true) 1289 } 1290 return container, nil 1291 } 1292 1293 // validateCommand validates that the command only has a single value and 1294 // returns a user friendly error message telling them to use the passed 1295 // argField. 1296 func validateCommand(command, argField string) error { 1297 trimmed := strings.TrimSpace(command) 1298 if len(trimmed) == 0 { 1299 return fmt.Errorf("command empty: %q", command) 1300 } 1301 1302 if len(trimmed) != len(command) { 1303 return fmt.Errorf("command contains extra white space: %q", command) 1304 } 1305 1306 return nil 1307 } 1308 1309 func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { 1310 h, ok := d.tasks.Get(taskID) 1311 if !ok { 1312 return nil, drivers.ErrTaskNotFound 1313 } 1314 ch := make(chan *drivers.ExitResult) 1315 go d.handleWait(ctx, ch, h) 1316 return ch, nil 1317 } 1318 1319 func (d *Driver) handleWait(ctx context.Context, ch chan *drivers.ExitResult, h *taskHandle) { 1320 defer close(ch) 1321 select { 1322 case <-h.waitCh: 1323 ch <- h.ExitResult() 1324 case <-ctx.Done(): 1325 ch <- &drivers.ExitResult{ 1326 Err: ctx.Err(), 1327 } 1328 } 1329 } 1330 1331 // parseSignal interprets the signal name into an os.Signal. If no name is 1332 // provided, the docker driver defaults to SIGTERM. If the OS is Windows and 1333 // SIGINT is provided, the signal is converted to SIGTERM. 1334 func (d *Driver) parseSignal(os, signal string) (os.Signal, error) { 1335 // Unlike other drivers, docker defaults to SIGTERM, aiming for consistency 1336 // with the 'docker stop' command. 1337 // https://docs.docker.com/engine/reference/commandline/stop/#extended-description 1338 if signal == "" { 1339 signal = "SIGTERM" 1340 } 1341 1342 // Windows Docker daemon does not support SIGINT, SIGTERM is the semantic equivalent that 1343 // allows for graceful shutdown before being followed up by a SIGKILL. 1344 // Supported signals: 1345 // https://github.com/moby/moby/blob/0111ee70874a4947d93f64b672f66a2a35071ee2/pkg/signal/signal_windows.go#L17-L26 1346 if os == "windows" && signal == "SIGINT" { 1347 signal = "SIGTERM" 1348 } 1349 1350 return signals.Parse(signal) 1351 } 1352 1353 func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { 1354 h, ok := d.tasks.Get(taskID) 1355 if !ok { 1356 return drivers.ErrTaskNotFound 1357 } 1358 1359 sig, err := d.parseSignal(runtime.GOOS, signal) 1360 if err != nil { 1361 return fmt.Errorf("failed to parse signal: %v", err) 1362 } 1363 1364 return h.Kill(timeout, sig) 1365 } 1366 1367 func (d *Driver) DestroyTask(taskID string, force bool) error { 1368 h, ok := d.tasks.Get(taskID) 1369 if !ok { 1370 return drivers.ErrTaskNotFound 1371 } 1372 1373 c, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 1374 ID: h.containerID, 1375 }) 1376 if err != nil { 1377 switch err.(type) { 1378 case *docker.NoSuchContainer: 1379 h.logger.Info("container was removed out of band, will proceed with DestroyTask", 1380 "error", err) 1381 default: 1382 return fmt.Errorf("failed to inspect container state: %v", err) 1383 } 1384 } else { 1385 if c.State.Running { 1386 if !force { 1387 return fmt.Errorf("must call StopTask for the given task before Destroy or set force to true") 1388 } 1389 if err := h.client.StopContainer(h.containerID, 0); err != nil { 1390 h.logger.Warn("failed to stop container during destroy", "error", err) 1391 } 1392 } 1393 1394 if h.removeContainerOnExit { 1395 if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, RemoveVolumes: true, Force: true}); err != nil { 1396 h.logger.Error("error removing container", "error", err) 1397 } 1398 } else { 1399 h.logger.Debug("not removing container due to config") 1400 } 1401 } 1402 1403 if err := d.cleanupImage(h); err != nil { 1404 h.logger.Error("failed to cleanup image after destroying container", 1405 "error", err) 1406 } 1407 1408 d.tasks.Delete(taskID) 1409 return nil 1410 } 1411 1412 // cleanupImage removes a Docker image. No error is returned if the image 1413 // doesn't exist or is still in use. Requires the global client to already be 1414 // initialized. 1415 func (d *Driver) cleanupImage(handle *taskHandle) error { 1416 if !d.config.GC.Image { 1417 return nil 1418 } 1419 1420 d.coordinator.RemoveImage(handle.containerImage, handle.task.ID) 1421 1422 return nil 1423 } 1424 1425 func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { 1426 h, ok := d.tasks.Get(taskID) 1427 if !ok { 1428 return nil, drivers.ErrTaskNotFound 1429 } 1430 1431 container, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 1432 ID: h.containerID, 1433 }) 1434 if err != nil { 1435 return nil, fmt.Errorf("failed to inspect container %q: %v", h.containerID, err) 1436 } 1437 status := &drivers.TaskStatus{ 1438 ID: h.task.ID, 1439 Name: h.task.Name, 1440 StartedAt: container.State.StartedAt, 1441 CompletedAt: container.State.FinishedAt, 1442 DriverAttributes: map[string]string{ 1443 "container_id": container.ID, 1444 }, 1445 NetworkOverride: h.net, 1446 ExitResult: h.ExitResult(), 1447 } 1448 1449 status.State = drivers.TaskStateUnknown 1450 if container.State.Running { 1451 status.State = drivers.TaskStateRunning 1452 } 1453 if container.State.Dead { 1454 status.State = drivers.TaskStateExited 1455 } 1456 1457 return status, nil 1458 } 1459 1460 func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) { 1461 h, ok := d.tasks.Get(taskID) 1462 if !ok { 1463 return nil, drivers.ErrTaskNotFound 1464 } 1465 1466 return h.Stats(ctx, interval) 1467 } 1468 1469 func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { 1470 return d.eventer.TaskEvents(ctx) 1471 } 1472 1473 func (d *Driver) SignalTask(taskID string, signal string) error { 1474 h, ok := d.tasks.Get(taskID) 1475 if !ok { 1476 return drivers.ErrTaskNotFound 1477 } 1478 1479 sig, err := signals.Parse(signal) 1480 if err != nil { 1481 return fmt.Errorf("failed to parse signal: %v", err) 1482 } 1483 1484 // TODO: review whether we can timeout in this and other Docker API 1485 // calls without breaking the expected client behavior. 1486 // see https://github.com/hashicorp/nomad/issues/9503 1487 return h.Signal(context.Background(), sig) 1488 } 1489 1490 func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { 1491 h, ok := d.tasks.Get(taskID) 1492 if !ok { 1493 return nil, drivers.ErrTaskNotFound 1494 } 1495 1496 if len(cmd) == 0 { 1497 return nil, fmt.Errorf("cmd is required, but was empty") 1498 } 1499 1500 ctx, cancel := context.WithTimeout(context.Background(), timeout) 1501 defer cancel() 1502 1503 return h.Exec(ctx, cmd[0], cmd[1:]) 1504 } 1505 1506 var _ drivers.ExecTaskStreamingDriver = (*Driver)(nil) 1507 1508 func (d *Driver) ExecTaskStreaming(ctx context.Context, taskID string, opts *drivers.ExecOptions) (*drivers.ExitResult, error) { 1509 defer opts.Stdout.Close() 1510 defer opts.Stderr.Close() 1511 1512 done := make(chan interface{}) 1513 defer close(done) 1514 1515 h, ok := d.tasks.Get(taskID) 1516 if !ok { 1517 return nil, drivers.ErrTaskNotFound 1518 } 1519 1520 if len(opts.Command) == 0 { 1521 return nil, fmt.Errorf("command is required but was empty") 1522 } 1523 1524 createExecOpts := docker.CreateExecOptions{ 1525 AttachStdin: true, 1526 AttachStdout: true, 1527 AttachStderr: true, 1528 Tty: opts.Tty, 1529 Cmd: opts.Command, 1530 Container: h.containerID, 1531 Context: ctx, 1532 } 1533 exec, err := h.client.CreateExec(createExecOpts) 1534 if err != nil { 1535 return nil, fmt.Errorf("failed to create exec object: %v", err) 1536 } 1537 1538 go func() { 1539 for { 1540 select { 1541 case <-ctx.Done(): 1542 return 1543 case <-done: 1544 return 1545 case s, ok := <-opts.ResizeCh: 1546 if !ok { 1547 return 1548 } 1549 client.ResizeExecTTY(exec.ID, s.Height, s.Width) 1550 } 1551 } 1552 }() 1553 1554 startOpts := docker.StartExecOptions{ 1555 Detach: false, 1556 1557 // When running in TTY, we must use a raw terminal. 1558 // If not, we set RawTerminal to false to allow docker client 1559 // to interpret special stdout/stderr messages 1560 Tty: opts.Tty, 1561 RawTerminal: opts.Tty, 1562 1563 InputStream: opts.Stdin, 1564 OutputStream: opts.Stdout, 1565 ErrorStream: opts.Stderr, 1566 Context: ctx, 1567 } 1568 if err := client.StartExec(exec.ID, startOpts); err != nil { 1569 return nil, fmt.Errorf("failed to start exec: %v", err) 1570 } 1571 1572 // StartExec returns after process completes, but InspectExec seems to have a delay 1573 // get in getting status code 1574 1575 const execTerminatingTimeout = 3 * time.Second 1576 start := time.Now() 1577 var res *docker.ExecInspect 1578 for (res == nil || res.Running) && time.Since(start) <= execTerminatingTimeout { 1579 res, err = client.InspectExec(exec.ID) 1580 if err != nil { 1581 return nil, fmt.Errorf("failed to inspect exec result: %v", err) 1582 } 1583 time.Sleep(50 * time.Millisecond) 1584 } 1585 1586 if res == nil || res.Running { 1587 return nil, fmt.Errorf("failed to retrieve exec result") 1588 } 1589 1590 return &drivers.ExitResult{ 1591 ExitCode: res.ExitCode, 1592 }, nil 1593 } 1594 1595 // dockerClients creates two *docker.Client, one for long running operations and 1596 // the other for shorter operations. In test / dev mode we can use ENV vars to 1597 // connect to the docker daemon. In production mode we will read docker.endpoint 1598 // from the config file. 1599 func (d *Driver) dockerClients() (*docker.Client, *docker.Client, error) { 1600 createClientsLock.Lock() 1601 defer createClientsLock.Unlock() 1602 1603 if client != nil && waitClient != nil { 1604 return client, waitClient, nil 1605 } 1606 1607 var err error 1608 1609 // Only initialize the client if it hasn't yet been done 1610 if client == nil { 1611 client, err = d.newDockerClient(dockerTimeout) 1612 if err != nil { 1613 return nil, nil, err 1614 } 1615 } 1616 1617 // Only initialize the waitClient if it hasn't yet been done 1618 if waitClient == nil { 1619 waitClient, err = d.newDockerClient(0 * time.Minute) 1620 if err != nil { 1621 return nil, nil, err 1622 } 1623 } 1624 1625 return client, waitClient, nil 1626 } 1627 1628 // newDockerClient creates a new *docker.Client with a configurable timeout 1629 func (d *Driver) newDockerClient(timeout time.Duration) (*docker.Client, error) { 1630 var err error 1631 var merr multierror.Error 1632 var newClient *docker.Client 1633 1634 // Default to using whatever is configured in docker.endpoint. If this is 1635 // not specified we'll fall back on NewClientFromEnv which reads config from 1636 // the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and 1637 // DOCKER_CERT_PATH. This allows us to lock down the config in production 1638 // but also accept the standard ENV configs for dev and test. 1639 dockerEndpoint := d.config.Endpoint 1640 if dockerEndpoint != "" { 1641 cert := d.config.TLS.Cert 1642 key := d.config.TLS.Key 1643 ca := d.config.TLS.CA 1644 1645 if cert+key+ca != "" { 1646 d.logger.Debug("using TLS client connection", "endpoint", dockerEndpoint) 1647 newClient, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca) 1648 if err != nil { 1649 merr.Errors = append(merr.Errors, err) 1650 } 1651 } else { 1652 d.logger.Debug("using standard client connection", "endpoint", dockerEndpoint) 1653 newClient, err = docker.NewClient(dockerEndpoint) 1654 if err != nil { 1655 merr.Errors = append(merr.Errors, err) 1656 } 1657 } 1658 } else { 1659 d.logger.Debug("using client connection initialized from environment") 1660 newClient, err = docker.NewClientFromEnv() 1661 if err != nil { 1662 merr.Errors = append(merr.Errors, err) 1663 } 1664 } 1665 1666 if timeout != 0 && newClient != nil { 1667 newClient.SetTimeout(timeout) 1668 } 1669 return newClient, merr.ErrorOrNil() 1670 } 1671 1672 func sliceMergeUlimit(ulimitsRaw map[string]string) ([]docker.ULimit, error) { 1673 var ulimits []docker.ULimit 1674 1675 for name, ulimitRaw := range ulimitsRaw { 1676 if len(ulimitRaw) == 0 { 1677 return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %q, cannot be empty", name, ulimitRaw) 1678 } 1679 // hard limit is optional 1680 if !strings.Contains(ulimitRaw, ":") { 1681 ulimitRaw = ulimitRaw + ":" + ulimitRaw 1682 } 1683 1684 splitted := strings.SplitN(ulimitRaw, ":", 2) 1685 if len(splitted) < 2 { 1686 return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %v", name, ulimitRaw) 1687 } 1688 soft, err := strconv.Atoi(splitted[0]) 1689 if err != nil { 1690 return []docker.ULimit{}, fmt.Errorf("Malformed soft ulimit %v: %v", name, ulimitRaw) 1691 } 1692 hard, err := strconv.Atoi(splitted[1]) 1693 if err != nil { 1694 return []docker.ULimit{}, fmt.Errorf("Malformed hard ulimit %v: %v", name, ulimitRaw) 1695 } 1696 1697 ulimit := docker.ULimit{ 1698 Name: name, 1699 Soft: int64(soft), 1700 Hard: int64(hard), 1701 } 1702 ulimits = append(ulimits, ulimit) 1703 } 1704 return ulimits, nil 1705 } 1706 1707 func isDockerTransientError(err error) bool { 1708 if err == nil { 1709 return false 1710 } 1711 1712 errMsg := err.Error() 1713 for _, te := range dockerTransientErrs { 1714 if strings.Contains(errMsg, te) { 1715 return true 1716 } 1717 } 1718 1719 return false 1720 }