github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/drivers/docker/driver.go (about) 1 package docker 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/json" 7 "fmt" 8 "io/ioutil" 9 "net" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strconv" 14 "strings" 15 "sync" 16 "time" 17 18 docker "github.com/fsouza/go-dockerclient" 19 "github.com/hashicorp/consul-template/signals" 20 hclog "github.com/hashicorp/go-hclog" 21 multierror "github.com/hashicorp/go-multierror" 22 plugin "github.com/hashicorp/go-plugin" 23 "github.com/hashicorp/nomad/client/lib/cgutil" 24 "github.com/hashicorp/nomad/client/taskenv" 25 "github.com/hashicorp/nomad/drivers/docker/docklog" 26 "github.com/hashicorp/nomad/drivers/shared/capabilities" 27 "github.com/hashicorp/nomad/drivers/shared/eventer" 28 "github.com/hashicorp/nomad/drivers/shared/hostnames" 29 "github.com/hashicorp/nomad/drivers/shared/resolvconf" 30 nstructs "github.com/hashicorp/nomad/nomad/structs" 31 "github.com/hashicorp/nomad/plugins/base" 32 "github.com/hashicorp/nomad/plugins/drivers" 33 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 34 "github.com/ryanuber/go-glob" 35 ) 36 37 var ( 38 // createClientsLock is a lock that protects reading/writing global client 39 // variables 40 createClientsLock sync.Mutex 41 42 // client is a docker client with a timeout of 5 minutes. This is for doing 43 // all operations with the docker daemon besides which are not long running 44 // such as creating, killing containers, etc. 45 client *docker.Client 46 47 // waitClient is a docker client with no timeouts. This is used for long 48 // running operations such as waiting on containers and collect stats 49 waitClient *docker.Client 50 51 dockerTransientErrs = []string{ 52 "Client.Timeout exceeded while awaiting headers", 53 "EOF", 54 "API error (500)", 55 } 56 57 // recoverableErrTimeouts returns a recoverable error if the error was due 58 // to timeouts 59 recoverableErrTimeouts = func(err error) error { 60 r := false 61 if strings.Contains(err.Error(), "Client.Timeout exceeded while awaiting headers") || 62 strings.Contains(err.Error(), "EOF") { 63 r = true 64 } 65 return nstructs.NewRecoverableError(err, r) 66 } 67 68 // taskHandleVersion is the version of task handle which this driver sets 69 // and understands how to decode driver state 70 taskHandleVersion = 1 71 72 // Nvidia-container-runtime environment variable names 73 nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES" 74 ) 75 76 const ( 77 dockerLabelAllocID = "com.hashicorp.nomad.alloc_id" 78 dockerLabelJobName = "com.hashicorp.nomad.job_name" 79 dockerLabelJobID = "com.hashicorp.nomad.job_id" 80 dockerLabelTaskGroupName = "com.hashicorp.nomad.task_group_name" 81 dockerLabelTaskName = "com.hashicorp.nomad.task_name" 82 dockerLabelNamespace = "com.hashicorp.nomad.namespace" 83 dockerLabelNodeName = "com.hashicorp.nomad.node_name" 84 dockerLabelNodeID = "com.hashicorp.nomad.node_id" 85 ) 86 87 type Driver struct { 88 // eventer is used to handle multiplexing of TaskEvents calls such that an 89 // event can be broadcast to all callers 90 eventer *eventer.Eventer 91 92 // config contains the runtime configuration for the driver set by the 93 // SetConfig RPC 94 config *DriverConfig 95 96 // clientConfig contains a driver specific subset of the Nomad client 97 // configuration 98 clientConfig *base.ClientDriverConfig 99 100 // ctx is the context for the driver. It is passed to other subsystems to 101 // coordinate shutdown 102 ctx context.Context 103 104 // tasks is the in memory datastore mapping taskIDs to taskHandles 105 tasks *taskStore 106 107 // coordinator is what tracks multiple image pulls against the same docker image 108 coordinator *dockerCoordinator 109 110 // logger will log to the Nomad agent 111 logger hclog.Logger 112 113 // gpuRuntime indicates nvidia-docker runtime availability 114 gpuRuntime bool 115 116 // A tri-state boolean to know if the fingerprinting has happened and 117 // whether it has been successful 118 fingerprintSuccess *bool 119 fingerprintLock sync.RWMutex 120 121 // A boolean to know if the docker driver has ever been correctly detected 122 // for use during fingerprinting. 123 detected bool 124 detectedLock sync.RWMutex 125 126 danglingReconciler *containerReconciler 127 cpusetFixer CpusetFixer 128 } 129 130 // NewDockerDriver returns a docker implementation of a driver plugin 131 func NewDockerDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin { 132 logger = logger.Named(pluginName) 133 return &Driver{ 134 eventer: eventer.NewEventer(ctx, logger), 135 config: &DriverConfig{}, 136 tasks: newTaskStore(), 137 ctx: ctx, 138 logger: logger, 139 } 140 } 141 142 func (d *Driver) reattachToDockerLogger(reattachConfig *pstructs.ReattachConfig) (docklog.DockerLogger, *plugin.Client, error) { 143 reattach, err := pstructs.ReattachConfigToGoPlugin(reattachConfig) 144 if err != nil { 145 return nil, nil, err 146 } 147 148 dlogger, dloggerPluginClient, err := docklog.ReattachDockerLogger(reattach) 149 if err != nil { 150 return nil, nil, fmt.Errorf("failed to reattach to docker logger process: %v", err) 151 } 152 153 return dlogger, dloggerPluginClient, nil 154 } 155 156 func (d *Driver) setupNewDockerLogger(container *docker.Container, cfg *drivers.TaskConfig, startTime time.Time) (docklog.DockerLogger, *plugin.Client, error) { 157 dlogger, pluginClient, err := docklog.LaunchDockerLogger(d.logger) 158 if err != nil { 159 if pluginClient != nil { 160 pluginClient.Kill() 161 } 162 return nil, nil, fmt.Errorf("failed to launch docker logger plugin: %v", err) 163 } 164 165 if err := dlogger.Start(&docklog.StartOpts{ 166 Endpoint: d.config.Endpoint, 167 ContainerID: container.ID, 168 TTY: container.Config.Tty, 169 Stdout: cfg.StdoutPath, 170 Stderr: cfg.StderrPath, 171 TLSCert: d.config.TLS.Cert, 172 TLSKey: d.config.TLS.Key, 173 TLSCA: d.config.TLS.CA, 174 StartTime: startTime.Unix(), 175 }); err != nil { 176 pluginClient.Kill() 177 return nil, nil, fmt.Errorf("failed to launch docker logger process %s: %v", container.ID, err) 178 } 179 180 return dlogger, pluginClient, nil 181 } 182 183 func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { 184 if _, ok := d.tasks.Get(handle.Config.ID); ok { 185 return nil 186 } 187 188 var handleState taskHandleState 189 if err := handle.GetDriverState(&handleState); err != nil { 190 return fmt.Errorf("failed to decode driver task state: %v", err) 191 } 192 193 client, _, err := d.dockerClients() 194 if err != nil { 195 return fmt.Errorf("failed to get docker client: %v", err) 196 } 197 198 container, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 199 ID: handleState.ContainerID, 200 }) 201 if err != nil { 202 return fmt.Errorf("failed to inspect container for id %q: %v", handleState.ContainerID, err) 203 } 204 205 h := &taskHandle{ 206 client: client, 207 waitClient: waitClient, 208 logger: d.logger.With("container_id", container.ID), 209 task: handle.Config, 210 containerID: container.ID, 211 containerImage: container.Image, 212 doneCh: make(chan bool), 213 waitCh: make(chan struct{}), 214 removeContainerOnExit: d.config.GC.Container, 215 net: handleState.DriverNetwork, 216 } 217 218 if !d.config.DisableLogCollection { 219 h.dlogger, h.dloggerPluginClient, err = d.reattachToDockerLogger(handleState.ReattachConfig) 220 if err != nil { 221 d.logger.Warn("failed to reattach to docker logger process", "error", err) 222 223 h.dlogger, h.dloggerPluginClient, err = d.setupNewDockerLogger(container, handle.Config, time.Now()) 224 if err != nil { 225 if err := client.StopContainer(handleState.ContainerID, 0); err != nil { 226 d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err) 227 } 228 return fmt.Errorf("failed to setup replacement docker logger: %v", err) 229 } 230 231 if err := handle.SetDriverState(h.buildState()); err != nil { 232 if err := client.StopContainer(handleState.ContainerID, 0); err != nil { 233 d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err) 234 } 235 return fmt.Errorf("failed to store driver state: %v", err) 236 } 237 } 238 } 239 240 d.tasks.Set(handle.Config.ID, h) 241 go h.run() 242 243 return nil 244 } 245 246 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { 247 if _, ok := d.tasks.Get(cfg.ID); ok { 248 return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) 249 } 250 251 var driverConfig TaskConfig 252 253 if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { 254 return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) 255 } 256 257 if driverConfig.Image == "" { 258 return nil, nil, fmt.Errorf("image name required for docker driver") 259 } 260 261 driverConfig.Image = strings.TrimPrefix(driverConfig.Image, "https://") 262 263 handle := drivers.NewTaskHandle(taskHandleVersion) 264 handle.Config = cfg 265 266 // Initialize docker API clients 267 client, _, err := d.dockerClients() 268 if err != nil { 269 return nil, nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 270 } 271 272 id, err := d.createImage(cfg, &driverConfig, client) 273 if err != nil { 274 return nil, nil, err 275 } 276 277 if runtime.GOOS == "windows" { 278 err = d.convertAllocPathsForWindowsLCOW(cfg, driverConfig.Image) 279 if err != nil { 280 return nil, nil, err 281 } 282 } 283 284 containerCfg, err := d.createContainerConfig(cfg, &driverConfig, driverConfig.Image) 285 if err != nil { 286 d.logger.Error("failed to create container configuration", "image_name", driverConfig.Image, 287 "image_id", id, "error", err) 288 return nil, nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", driverConfig.Image, id, err) 289 } 290 291 startAttempts := 0 292 CREATE: 293 container, err := d.createContainer(client, containerCfg, driverConfig.Image) 294 if err != nil { 295 d.logger.Error("failed to create container", "error", err) 296 client.RemoveContainer(docker.RemoveContainerOptions{ 297 ID: containerCfg.Name, 298 Force: true, 299 }) 300 return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to create container: %v", err), err) 301 } 302 303 d.logger.Info("created container", "container_id", container.ID) 304 305 // We don't need to start the container if the container is already running 306 // since we don't create containers which are already present on the host 307 // and are running 308 if !container.State.Running { 309 // Start the container 310 if err := d.startContainer(container); err != nil { 311 d.logger.Error("failed to start container", "container_id", container.ID, "error", err) 312 client.RemoveContainer(docker.RemoveContainerOptions{ 313 ID: container.ID, 314 Force: true, 315 }) 316 // Some sort of docker race bug, recreating the container usually works 317 if strings.Contains(err.Error(), "OCI runtime create failed: container with id exists:") && startAttempts < 5 { 318 startAttempts++ 319 d.logger.Debug("reattempting container create/start sequence", "attempt", startAttempts, "container_id", id) 320 goto CREATE 321 } 322 return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("Failed to start container %s: %s", container.ID, err), err) 323 } 324 325 // Inspect container to get all of the container metadata as much of the 326 // metadata (eg networking) isn't populated until the container is started 327 runningContainer, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 328 ID: container.ID, 329 }) 330 if err != nil { 331 client.RemoveContainer(docker.RemoveContainerOptions{ 332 ID: container.ID, 333 Force: true, 334 }) 335 msg := "failed to inspect started container" 336 d.logger.Error(msg, "error", err) 337 client.RemoveContainer(docker.RemoveContainerOptions{ 338 ID: container.ID, 339 Force: true, 340 }) 341 return nil, nil, nstructs.NewRecoverableError(fmt.Errorf("%s %s: %s", msg, container.ID, err), true) 342 } 343 container = runningContainer 344 d.logger.Info("started container", "container_id", container.ID) 345 } else { 346 d.logger.Debug("re-attaching to container", "container_id", 347 container.ID, "container_state", container.State.String()) 348 } 349 350 if !cgutil.UseV2 { 351 // This does not apply to cgroups.v2, which only allows setting the PID 352 // into exactly 1 group. For cgroups.v2, we use the cpuset fixer to reconcile 353 // the cpuset value into the cgroups created by docker in the background. 354 if containerCfg.HostConfig.CPUSet == "" && cfg.Resources.LinuxResources.CpusetCgroupPath != "" { 355 if err := setCPUSetCgroup(cfg.Resources.LinuxResources.CpusetCgroupPath, container.State.Pid); err != nil { 356 return nil, nil, fmt.Errorf("failed to set the cpuset cgroup for container: %v", err) 357 } 358 } 359 } 360 361 collectingLogs := !d.config.DisableLogCollection 362 363 var dlogger docklog.DockerLogger 364 var pluginClient *plugin.Client 365 366 if collectingLogs { 367 dlogger, pluginClient, err = d.setupNewDockerLogger(container, cfg, time.Unix(0, 0)) 368 if err != nil { 369 d.logger.Error("an error occurred after container startup, terminating container", "container_id", container.ID) 370 client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true}) 371 return nil, nil, err 372 } 373 } 374 375 // Detect container address 376 ip, autoUse := d.detectIP(container, &driverConfig) 377 378 net := &drivers.DriverNetwork{ 379 PortMap: driverConfig.PortMap, 380 IP: ip, 381 AutoAdvertise: autoUse, 382 } 383 384 // Return a driver handle 385 h := &taskHandle{ 386 client: client, 387 waitClient: waitClient, 388 dlogger: dlogger, 389 dloggerPluginClient: pluginClient, 390 logger: d.logger.With("container_id", container.ID), 391 task: cfg, 392 containerID: container.ID, 393 containerImage: container.Image, 394 doneCh: make(chan bool), 395 waitCh: make(chan struct{}), 396 removeContainerOnExit: d.config.GC.Container, 397 net: net, 398 } 399 400 if err := handle.SetDriverState(h.buildState()); err != nil { 401 d.logger.Error("error encoding container occurred after startup, terminating container", "container_id", container.ID, "error", err) 402 if collectingLogs { 403 dlogger.Stop() 404 pluginClient.Kill() 405 } 406 client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true}) 407 return nil, nil, err 408 } 409 410 d.tasks.Set(cfg.ID, h) 411 go h.run() 412 413 return handle, net, nil 414 } 415 416 // createContainerClient is the subset of Docker Client methods used by the 417 // createContainer method to ease testing subtle error conditions. 418 type createContainerClient interface { 419 CreateContainer(docker.CreateContainerOptions) (*docker.Container, error) 420 InspectContainer(id string) (*docker.Container, error) 421 ListContainers(docker.ListContainersOptions) ([]docker.APIContainers, error) 422 RemoveContainer(opts docker.RemoveContainerOptions) error 423 } 424 425 // createContainer creates the container given the passed configuration. It 426 // attempts to handle any transient Docker errors. 427 func (d *Driver) createContainer(client createContainerClient, config docker.CreateContainerOptions, 428 image string) (*docker.Container, error) { 429 // Create a container 430 attempted := 0 431 CREATE: 432 container, createErr := client.CreateContainer(config) 433 if createErr == nil { 434 return container, nil 435 } 436 437 d.logger.Debug("failed to create container", "container_name", 438 config.Name, "image_name", image, "image_id", config.Config.Image, 439 "attempt", attempted+1, "error", createErr) 440 441 // Volume management tools like Portworx may not have detached a volume 442 // from a previous node before Nomad started a task replacement task. 443 // Treat these errors as recoverable so we retry. 444 if strings.Contains(strings.ToLower(createErr.Error()), "volume is attached on another node") { 445 return nil, nstructs.NewRecoverableError(createErr, true) 446 } 447 448 // If the container already exists determine whether it's already 449 // running or if it's dead and needs to be recreated. 450 if strings.Contains(strings.ToLower(createErr.Error()), "container already exists") { 451 452 container, err := d.containerByName(config.Name) 453 if err != nil { 454 return nil, err 455 } 456 457 if container != nil && container.State.Running { 458 return container, nil 459 } 460 461 // Purge conflicting container if found. 462 // If container is nil here, the conflicting container was 463 // deleted in our check here, so retry again. 464 if container != nil { 465 // Delete matching containers 466 err = client.RemoveContainer(docker.RemoveContainerOptions{ 467 ID: container.ID, 468 Force: true, 469 }) 470 if err != nil { 471 d.logger.Error("failed to purge container", "container_id", container.ID) 472 return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err)) 473 } else { 474 d.logger.Info("purged container", "container_id", container.ID) 475 } 476 } 477 478 if attempted < 5 { 479 attempted++ 480 time.Sleep(nextBackoff(attempted)) 481 goto CREATE 482 } 483 } else if strings.Contains(strings.ToLower(createErr.Error()), "no such image") { 484 // There is still a very small chance this is possible even with the 485 // coordinator so retry. 486 return nil, nstructs.NewRecoverableError(createErr, true) 487 } else if isDockerTransientError(createErr) && attempted < 5 { 488 attempted++ 489 time.Sleep(nextBackoff(attempted)) 490 goto CREATE 491 } 492 493 return nil, recoverableErrTimeouts(createErr) 494 } 495 496 // startContainer starts the passed container. It attempts to handle any 497 // transient Docker errors. 498 func (d *Driver) startContainer(c *docker.Container) error { 499 // Start a container 500 attempted := 0 501 START: 502 startErr := client.StartContainer(c.ID, c.HostConfig) 503 if startErr == nil || strings.Contains(startErr.Error(), "Container already running") { 504 return nil 505 } 506 507 d.logger.Debug("failed to start container", "container_id", c.ID, "attempt", attempted+1, "error", startErr) 508 509 if isDockerTransientError(startErr) { 510 if attempted < 5 { 511 attempted++ 512 time.Sleep(nextBackoff(attempted)) 513 goto START 514 } 515 return nstructs.NewRecoverableError(startErr, true) 516 } 517 518 return recoverableErrTimeouts(startErr) 519 } 520 521 // nextBackoff returns appropriate docker backoff durations after attempted attempts. 522 func nextBackoff(attempted int) time.Duration { 523 // attempts in 200ms, 800ms, 3.2s, 12.8s, 51.2s 524 // TODO: add randomization factor and extract to a helper 525 return 1 << (2 * uint64(attempted)) * 50 * time.Millisecond 526 } 527 528 // createImage creates a docker image either by pulling it from a registry or by 529 // loading it from the file system 530 func (d *Driver) createImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (string, error) { 531 image := driverConfig.Image 532 repo, tag := parseDockerImage(image) 533 534 // We're going to check whether the image is already downloaded. If the tag 535 // is "latest", or ForcePull is set, we have to check for a new version every time so we don't 536 // bother to check and cache the id here. We'll download first, then cache. 537 if driverConfig.ForcePull { 538 d.logger.Debug("force pulling image instead of inspecting local", "image_ref", dockerImageRef(repo, tag)) 539 } else if tag != "latest" { 540 if dockerImage, _ := client.InspectImage(image); dockerImage != nil { 541 // Image exists so just increment its reference count 542 d.coordinator.IncrementImageReference(dockerImage.ID, image, task.ID) 543 return dockerImage.ID, nil 544 } 545 } 546 547 // Load the image if specified 548 if driverConfig.LoadImage != "" { 549 return d.loadImage(task, driverConfig, client) 550 } 551 552 // Download the image 553 return d.pullImage(task, driverConfig, client, repo, tag) 554 } 555 556 // pullImage creates an image by pulling it from a docker registry 557 func (d *Driver) pullImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client, repo, tag string) (id string, err error) { 558 authOptions, err := d.resolveRegistryAuthentication(driverConfig, repo) 559 if err != nil { 560 if driverConfig.AuthSoftFail { 561 d.logger.Warn("Failed to find docker repo auth", "repo", repo, "error", err) 562 } else { 563 return "", fmt.Errorf("Failed to find docker auth for repo %q: %v", repo, err) 564 } 565 } 566 567 if authIsEmpty(authOptions) { 568 d.logger.Debug("did not find docker auth for repo", "repo", repo) 569 } 570 571 d.eventer.EmitEvent(&drivers.TaskEvent{ 572 TaskID: task.ID, 573 AllocID: task.AllocID, 574 TaskName: task.Name, 575 Timestamp: time.Now(), 576 Message: "Downloading image", 577 Annotations: map[string]string{ 578 "image": dockerImageRef(repo, tag), 579 }, 580 }) 581 582 pullDur, err := time.ParseDuration(driverConfig.ImagePullTimeout) 583 if err != nil { 584 return "", fmt.Errorf("Failed to parse image_pull_timeout: %v", err) 585 } 586 587 return d.coordinator.PullImage(driverConfig.Image, authOptions, task.ID, d.emitEventFunc(task), pullDur, d.config.pullActivityTimeoutDuration) 588 } 589 590 func (d *Driver) emitEventFunc(task *drivers.TaskConfig) LogEventFn { 591 return func(msg string, annotations map[string]string) { 592 d.eventer.EmitEvent(&drivers.TaskEvent{ 593 TaskID: task.ID, 594 AllocID: task.AllocID, 595 TaskName: task.Name, 596 Timestamp: time.Now(), 597 Message: msg, 598 Annotations: annotations, 599 }) 600 } 601 } 602 603 // authBackend encapsulates a function that resolves registry credentials. 604 type authBackend func(string) (*docker.AuthConfiguration, error) 605 606 // resolveRegistryAuthentication attempts to retrieve auth credentials for the 607 // repo, trying all authentication-backends possible. 608 func (d *Driver) resolveRegistryAuthentication(driverConfig *TaskConfig, repo string) (*docker.AuthConfiguration, error) { 609 return firstValidAuth(repo, []authBackend{ 610 authFromTaskConfig(driverConfig), 611 authFromDockerConfig(d.config.Auth.Config), 612 authFromHelper(d.config.Auth.Helper), 613 }) 614 } 615 616 // loadImage creates an image by loading it from the file system 617 func (d *Driver) loadImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (id string, err error) { 618 619 archive := filepath.Join(task.TaskDir().LocalDir, driverConfig.LoadImage) 620 d.logger.Debug("loading image from disk", "archive", archive) 621 622 f, err := os.Open(archive) 623 if err != nil { 624 return "", fmt.Errorf("unable to open image archive: %v", err) 625 } 626 627 if err := client.LoadImage(docker.LoadImageOptions{InputStream: f}); err != nil { 628 return "", err 629 } 630 f.Close() 631 632 dockerImage, err := client.InspectImage(driverConfig.Image) 633 if err != nil { 634 return "", recoverableErrTimeouts(err) 635 } 636 637 d.coordinator.IncrementImageReference(dockerImage.ID, driverConfig.Image, task.ID) 638 return dockerImage.ID, nil 639 } 640 641 func (d *Driver) convertAllocPathsForWindowsLCOW(task *drivers.TaskConfig, image string) error { 642 imageConfig, err := client.InspectImage(image) 643 if err != nil { 644 return fmt.Errorf("the image does not exist: %v", err) 645 } 646 // LCOW If we are running a Linux Container on Windows, we need to mount it correctly, as c:\ does not exist on unix 647 if imageConfig.OS == "linux" { 648 a := []rune(task.Env[taskenv.AllocDir]) 649 task.Env[taskenv.AllocDir] = strings.ReplaceAll(string(a[2:]), "\\", "/") 650 l := []rune(task.Env[taskenv.TaskLocalDir]) 651 task.Env[taskenv.TaskLocalDir] = strings.ReplaceAll(string(l[2:]), "\\", "/") 652 s := []rune(task.Env[taskenv.SecretsDir]) 653 task.Env[taskenv.SecretsDir] = strings.ReplaceAll(string(s[2:]), "\\", "/") 654 } 655 return nil 656 } 657 658 func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConfig) ([]string, error) { 659 allocDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SharedAllocDir, task.Env[taskenv.AllocDir]) 660 taskLocalBind := fmt.Sprintf("%s:%s", task.TaskDir().LocalDir, task.Env[taskenv.TaskLocalDir]) 661 secretDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SecretsDir, task.Env[taskenv.SecretsDir]) 662 binds := []string{allocDirBind, taskLocalBind, secretDirBind} 663 664 taskLocalBindVolume := driverConfig.VolumeDriver == "" 665 666 if !d.config.Volumes.Enabled && !taskLocalBindVolume { 667 return nil, fmt.Errorf("volumes are not enabled; cannot use volume driver %q", driverConfig.VolumeDriver) 668 } 669 670 for _, userbind := range driverConfig.Volumes { 671 // This assumes host OS = docker container OS. 672 // Not true, when we support Linux containers on Windows 673 src, dst, mode, err := parseVolumeSpec(userbind, runtime.GOOS) 674 if err != nil { 675 return nil, fmt.Errorf("invalid docker volume %q: %v", userbind, err) 676 } 677 678 // Paths inside task dir are always allowed when using the default driver, 679 // Relative paths are always allowed as they mount within a container 680 // When a VolumeDriver is set, we assume we receive a binding in the format 681 // volume-name:container-dest 682 // Otherwise, we assume we receive a relative path binding in the format 683 // relative/to/task:/also/in/container 684 if taskLocalBindVolume { 685 src = expandPath(task.TaskDir().Dir, src) 686 } else { 687 // Resolve dotted path segments 688 src = filepath.Clean(src) 689 } 690 691 if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, src) { 692 return nil, fmt.Errorf("volumes are not enabled; cannot mount host paths: %+q", userbind) 693 } 694 695 bind := src + ":" + dst 696 if mode != "" { 697 bind += ":" + mode 698 } 699 binds = append(binds, bind) 700 } 701 702 if selinuxLabel := d.config.Volumes.SelinuxLabel; selinuxLabel != "" { 703 // Apply SELinux Label to each volume 704 for i := range binds { 705 binds[i] = fmt.Sprintf("%s:%s", binds[i], selinuxLabel) 706 } 707 } 708 709 return binds, nil 710 } 711 712 var userMountToUnixMount = map[string]string{ 713 // Empty string maps to `rprivate` for backwards compatibility in restored 714 // older tasks, where mount propagation will not be present. 715 "": "rprivate", 716 nstructs.VolumeMountPropagationPrivate: "rprivate", 717 nstructs.VolumeMountPropagationHostToTask: "rslave", 718 nstructs.VolumeMountPropagationBidirectional: "rshared", 719 } 720 721 // takes a local seccomp daemon, reads the file contents for sending to the daemon 722 // this code modified slightly from the docker CLI code 723 // https://github.com/docker/cli/blob/8ef8547eb6934b28497d309d21e280bcd25145f5/cli/command/container/opts.go#L840 724 func parseSecurityOpts(securityOpts []string) ([]string, error) { 725 for key, opt := range securityOpts { 726 con := strings.SplitN(opt, "=", 2) 727 if len(con) == 1 && con[0] != "no-new-privileges" { 728 if strings.Contains(opt, ":") { 729 con = strings.SplitN(opt, ":", 2) 730 } else { 731 return securityOpts, fmt.Errorf("invalid security_opt: %q", opt) 732 } 733 } 734 if con[0] == "seccomp" && con[1] != "unconfined" { 735 f, err := ioutil.ReadFile(con[1]) 736 if err != nil { 737 return securityOpts, fmt.Errorf("opening seccomp profile (%s) failed: %v", con[1], err) 738 } 739 b := bytes.NewBuffer(nil) 740 if err := json.Compact(b, f); err != nil { 741 return securityOpts, fmt.Errorf("compacting json for seccomp profile (%s) failed: %v", con[1], err) 742 } 743 securityOpts[key] = fmt.Sprintf("seccomp=%s", b.Bytes()) 744 } 745 } 746 747 return securityOpts, nil 748 } 749 750 // memoryLimits computes the memory and memory_reservation values passed along to 751 // the docker host config. These fields represent hard and soft/reserved memory 752 // limits from docker's perspective, respectively. 753 // 754 // The memory field on the task configuration can be interpreted as a hard or soft 755 // limit. Before Nomad v0.11.3, it was always a hard limit. Now, it is interpreted 756 // as a soft limit if the memory_hard_limit value is configured on the docker 757 // task driver configuration. When memory_hard_limit is set, the docker host 758 // config is configured such that the memory field is equal to memory_hard_limit 759 // value, and the memory_reservation field is set to the task driver memory value. 760 // 761 // If memory_hard_limit is not set (i.e. zero value), then the memory field of 762 // the task resource config is interpreted as a hard limit. In this case both the 763 // memory is set to the task resource memory value and memory_reservation is left 764 // unset. 765 // 766 // Returns (memory (hard), memory_reservation (soft)) values in bytes. 767 func memoryLimits(driverHardLimitMB int64, taskMemory drivers.MemoryResources) (memory, reserve int64) { 768 softBytes := taskMemory.MemoryMB * 1024 * 1024 769 770 hard := driverHardLimitMB 771 if taskMemory.MemoryMaxMB > hard { 772 hard = taskMemory.MemoryMaxMB 773 } 774 775 if hard <= 0 { 776 return softBytes, 0 777 } 778 return hard * 1024 * 1024, softBytes 779 } 780 781 // Extract the cgroup parent from the nomad cgroup (only for linux/v2) 782 func cgroupParent(resources *drivers.Resources) string { 783 var parent string 784 if cgutil.UseV2 && resources != nil && resources.LinuxResources != nil { 785 parent, _ = cgutil.SplitPath(resources.LinuxResources.CpusetCgroupPath) 786 } 787 return parent 788 } 789 790 func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig, 791 imageID string) (docker.CreateContainerOptions, error) { 792 793 // ensure that PortMap variables are populated early on 794 task.Env = taskenv.SetPortMapEnvs(task.Env, driverConfig.PortMap) 795 796 logger := d.logger.With("task_name", task.Name) 797 var c docker.CreateContainerOptions 798 if task.Resources == nil { 799 // Guard against missing resources. We should never have been able to 800 // schedule a job without specifying this. 801 logger.Error("task.Resources is empty") 802 return c, fmt.Errorf("task.Resources is empty") 803 } 804 binds, err := d.containerBinds(task, driverConfig) 805 if err != nil { 806 return c, err 807 } 808 logger.Trace("binding volumes", "volumes", binds) 809 810 // create the config block that will later be consumed by go-dockerclient 811 config := &docker.Config{ 812 Image: imageID, 813 Entrypoint: driverConfig.Entrypoint, 814 Hostname: driverConfig.Hostname, 815 User: task.User, 816 Tty: driverConfig.TTY, 817 OpenStdin: driverConfig.Interactive, 818 } 819 820 if driverConfig.WorkDir != "" { 821 config.WorkingDir = driverConfig.WorkDir 822 } 823 824 containerRuntime := driverConfig.Runtime 825 if _, ok := task.DeviceEnv[nvidiaVisibleDevices]; ok { 826 if !d.gpuRuntime { 827 return c, fmt.Errorf("requested docker runtime %q was not found", d.config.GPURuntimeName) 828 } 829 if containerRuntime != "" && containerRuntime != d.config.GPURuntimeName { 830 return c, fmt.Errorf("conflicting runtime requests: gpu runtime %q conflicts with task runtime %q", d.config.GPURuntimeName, containerRuntime) 831 } 832 containerRuntime = d.config.GPURuntimeName 833 } 834 if _, ok := d.config.allowRuntimes[containerRuntime]; !ok && containerRuntime != "" { 835 return c, fmt.Errorf("requested runtime %q is not allowed", containerRuntime) 836 } 837 838 memory, memoryReservation := memoryLimits(driverConfig.MemoryHardLimit, task.Resources.NomadResources.Memory) 839 840 var pidsLimit int64 841 842 // Pids limit defined in Nomad plugin config. Defaults to 0 (Unlimited). 843 if d.config.PidsLimit > 0 { 844 pidsLimit = d.config.PidsLimit 845 } 846 847 // Override Nomad plugin config pids limit, by user defined pids limit. 848 if driverConfig.PidsLimit > 0 { 849 if d.config.PidsLimit > 0 && driverConfig.PidsLimit > d.config.PidsLimit { 850 return c, fmt.Errorf("pids_limit cannot be greater than nomad plugin config pids_limit: %d", d.config.PidsLimit) 851 } 852 pidsLimit = driverConfig.PidsLimit 853 } 854 855 hostConfig := &docker.HostConfig{ 856 CgroupParent: cgroupParent(task.Resources), // if applicable 857 858 Memory: memory, // hard limit 859 MemoryReservation: memoryReservation, // soft limit 860 861 CPUShares: task.Resources.LinuxResources.CPUShares, 862 863 // Binds are used to mount a host volume into the container. We mount a 864 // local directory for storage and a shared alloc directory that can be 865 // used to share data between different tasks in the same task group. 866 Binds: binds, 867 868 StorageOpt: driverConfig.StorageOpt, 869 VolumeDriver: driverConfig.VolumeDriver, 870 871 PidsLimit: &pidsLimit, 872 873 Runtime: containerRuntime, 874 } 875 876 // This translates to docker create/run --cpuset-cpus option. 877 // --cpuset-cpus limit the specific CPUs or cores a container can use. 878 // Nomad natively manages cpusets, setting this option will override 879 // Nomad managed cpusets. 880 if driverConfig.CPUSetCPUs != "" { 881 hostConfig.CPUSetCPUs = driverConfig.CPUSetCPUs 882 } 883 884 // Enable tini (docker-init) init system. 885 if driverConfig.Init { 886 hostConfig.Init = driverConfig.Init 887 } 888 889 // Calculate CPU Quota 890 // cfs_quota_us is the time per core, so we must 891 // multiply the time by the number of cores available 892 // See https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu 893 if driverConfig.CPUHardLimit { 894 numCores := runtime.NumCPU() 895 if driverConfig.CPUCFSPeriod < 0 || driverConfig.CPUCFSPeriod > 1000000 { 896 return c, fmt.Errorf("invalid value for cpu_cfs_period") 897 } 898 if driverConfig.CPUCFSPeriod == 0 { 899 driverConfig.CPUCFSPeriod = task.Resources.LinuxResources.CPUPeriod 900 } 901 hostConfig.CPUPeriod = driverConfig.CPUCFSPeriod 902 hostConfig.CPUQuota = int64(task.Resources.LinuxResources.PercentTicks*float64(driverConfig.CPUCFSPeriod)) * int64(numCores) 903 } 904 905 // Windows does not support MemorySwap/MemorySwappiness #2193 906 if runtime.GOOS == "windows" { 907 hostConfig.MemorySwap = 0 908 hostConfig.MemorySwappiness = nil 909 } else { 910 hostConfig.MemorySwap = memory 911 912 // disable swap explicitly in non-Windows environments 913 var swapiness int64 = 0 914 hostConfig.MemorySwappiness = &swapiness 915 916 } 917 918 loggingDriver := driverConfig.Logging.Type 919 if loggingDriver == "" { 920 loggingDriver = driverConfig.Logging.Driver 921 } 922 923 hostConfig.LogConfig = docker.LogConfig{ 924 Type: loggingDriver, 925 Config: driverConfig.Logging.Config, 926 } 927 928 if hostConfig.LogConfig.Type == "" && hostConfig.LogConfig.Config == nil { 929 logger.Trace("no docker log driver provided, defaulting to plugin config") 930 hostConfig.LogConfig.Type = d.config.Logging.Type 931 hostConfig.LogConfig.Config = d.config.Logging.Config 932 } 933 934 logger.Debug("configured resources", 935 "memory", hostConfig.Memory, "memory_reservation", hostConfig.MemoryReservation, 936 "cpu_shares", hostConfig.CPUShares, "cpu_quota", hostConfig.CPUQuota, 937 "cpu_period", hostConfig.CPUPeriod) 938 939 logger.Debug("binding directories", "binds", hclog.Fmt("%#v", hostConfig.Binds)) 940 941 // set privileged mode 942 if driverConfig.Privileged && !d.config.AllowPrivileged { 943 return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`) 944 } 945 hostConfig.Privileged = driverConfig.Privileged 946 947 // set add/drop capabilities 948 if hostConfig.CapAdd, hostConfig.CapDrop, err = capabilities.Delta( 949 capabilities.DockerDefaults(), d.config.AllowCaps, driverConfig.CapAdd, driverConfig.CapDrop, 950 ); err != nil { 951 return c, err 952 } 953 954 // set SHM size 955 if driverConfig.ShmSize != 0 { 956 hostConfig.ShmSize = driverConfig.ShmSize 957 } 958 959 // Setup devices 960 for _, device := range driverConfig.Devices { 961 dd, err := device.toDockerDevice() 962 if err != nil { 963 return c, err 964 } 965 hostConfig.Devices = append(hostConfig.Devices, dd) 966 } 967 for _, device := range task.Devices { 968 hostConfig.Devices = append(hostConfig.Devices, docker.Device{ 969 PathOnHost: device.HostPath, 970 PathInContainer: device.TaskPath, 971 CgroupPermissions: device.Permissions, 972 }) 973 } 974 975 // Setup mounts 976 for _, m := range driverConfig.Mounts { 977 hm, err := d.toDockerMount(&m, task) 978 if err != nil { 979 return c, err 980 } 981 hostConfig.Mounts = append(hostConfig.Mounts, *hm) 982 } 983 for _, m := range driverConfig.MountsList { 984 hm, err := d.toDockerMount(&m, task) 985 if err != nil { 986 return c, err 987 } 988 hostConfig.Mounts = append(hostConfig.Mounts, *hm) 989 } 990 991 // Setup /etc/hosts 992 // If the task's network_mode is unset our hostname and IP will come from 993 // the Nomad-owned network (if in use), so we need to generate an 994 // /etc/hosts file that matches the network rather than the default one 995 // that comes from the pause container 996 if task.NetworkIsolation != nil && driverConfig.NetworkMode == "" { 997 etcHostMount, err := hostnames.GenerateEtcHostsMount( 998 task.AllocDir, task.NetworkIsolation, driverConfig.ExtraHosts) 999 if err != nil { 1000 return c, fmt.Errorf("failed to build mount for /etc/hosts: %v", err) 1001 } 1002 if etcHostMount != nil { 1003 // erase the extra_hosts field if we have a mount so we don't get 1004 // conflicting options error from dockerd 1005 driverConfig.ExtraHosts = nil 1006 hostConfig.Mounts = append(hostConfig.Mounts, docker.HostMount{ 1007 Target: etcHostMount.TaskPath, 1008 Source: etcHostMount.HostPath, 1009 Type: "bind", 1010 ReadOnly: etcHostMount.Readonly, 1011 BindOptions: &docker.BindOptions{ 1012 Propagation: etcHostMount.PropagationMode, 1013 }, 1014 }) 1015 } 1016 } 1017 1018 // Setup DNS 1019 // If task DNS options are configured Nomad will manage the resolv.conf file 1020 // Docker driver dns options are not compatible with task dns options 1021 if task.DNS != nil { 1022 dnsMount, err := resolvconf.GenerateDNSMount(task.TaskDir().Dir, task.DNS) 1023 if err != nil { 1024 return c, fmt.Errorf("failed to build mount for resolv.conf: %v", err) 1025 } 1026 hostConfig.Mounts = append(hostConfig.Mounts, docker.HostMount{ 1027 Target: dnsMount.TaskPath, 1028 Source: dnsMount.HostPath, 1029 Type: "bind", 1030 ReadOnly: dnsMount.Readonly, 1031 BindOptions: &docker.BindOptions{ 1032 Propagation: dnsMount.PropagationMode, 1033 }, 1034 }) 1035 } else { 1036 if len(driverConfig.DNSSearchDomains) > 0 { 1037 hostConfig.DNSSearch = driverConfig.DNSSearchDomains 1038 } 1039 if len(driverConfig.DNSOptions) > 0 { 1040 hostConfig.DNSOptions = driverConfig.DNSOptions 1041 } 1042 // set DNS servers 1043 for _, ip := range driverConfig.DNSServers { 1044 if net.ParseIP(ip) != nil { 1045 hostConfig.DNS = append(hostConfig.DNS, ip) 1046 } else { 1047 logger.Error("invalid ip address for container dns server", "ip", ip) 1048 } 1049 } 1050 } 1051 1052 for _, m := range task.Mounts { 1053 hm := docker.HostMount{ 1054 Type: "bind", 1055 Target: m.TaskPath, 1056 Source: m.HostPath, 1057 ReadOnly: m.Readonly, 1058 } 1059 1060 // MountPropagation is only supported by Docker on Linux: 1061 // https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation 1062 if runtime.GOOS == "linux" { 1063 hm.BindOptions = &docker.BindOptions{ 1064 Propagation: userMountToUnixMount[m.PropagationMode], 1065 } 1066 } 1067 1068 hostConfig.Mounts = append(hostConfig.Mounts, hm) 1069 } 1070 1071 hostConfig.ExtraHosts = driverConfig.ExtraHosts 1072 1073 hostConfig.IpcMode = driverConfig.IPCMode 1074 hostConfig.PidMode = driverConfig.PidMode 1075 hostConfig.UTSMode = driverConfig.UTSMode 1076 hostConfig.UsernsMode = driverConfig.UsernsMode 1077 hostConfig.SecurityOpt = driverConfig.SecurityOpt 1078 hostConfig.Sysctls = driverConfig.Sysctl 1079 1080 hostConfig.SecurityOpt, err = parseSecurityOpts(driverConfig.SecurityOpt) 1081 if err != nil { 1082 return c, fmt.Errorf("failed to parse security_opt configuration: %v", err) 1083 } 1084 1085 ulimits, err := sliceMergeUlimit(driverConfig.Ulimit) 1086 if err != nil { 1087 return c, fmt.Errorf("failed to parse ulimit configuration: %v", err) 1088 } 1089 hostConfig.Ulimits = ulimits 1090 1091 hostConfig.ReadonlyRootfs = driverConfig.ReadonlyRootfs 1092 1093 // set the docker network mode 1094 hostConfig.NetworkMode = driverConfig.NetworkMode 1095 1096 // if the driver config does not specify a network mode then try to use the 1097 // shared alloc network 1098 if hostConfig.NetworkMode == "" { 1099 if task.NetworkIsolation != nil && task.NetworkIsolation.Path != "" { 1100 // find the previously created parent container to join networks with 1101 netMode := fmt.Sprintf("container:%s", task.NetworkIsolation.Labels[dockerNetSpecLabelKey]) 1102 logger.Debug("configuring network mode for task group", "network_mode", netMode) 1103 hostConfig.NetworkMode = netMode 1104 } else { 1105 // docker default 1106 logger.Debug("networking mode not specified; using default") 1107 hostConfig.NetworkMode = "default" 1108 } 1109 } 1110 1111 // Setup port mapping and exposed ports 1112 ports := newPublishedPorts(logger) 1113 switch { 1114 case task.Resources.Ports != nil && len(driverConfig.Ports) > 0: 1115 // Do not set up docker port mapping if shared alloc networking is used 1116 if strings.HasPrefix(hostConfig.NetworkMode, "container:") { 1117 break 1118 } 1119 1120 for _, port := range driverConfig.Ports { 1121 if mapping, ok := task.Resources.Ports.Get(port); ok { 1122 ports.add(mapping.Label, mapping.HostIP, mapping.Value, mapping.To) 1123 } else { 1124 return c, fmt.Errorf("Port %q not found, check network stanza", port) 1125 } 1126 } 1127 case len(task.Resources.NomadResources.Networks) > 0: 1128 network := task.Resources.NomadResources.Networks[0] 1129 1130 for _, port := range network.ReservedPorts { 1131 ports.addMapped(port.Label, network.IP, port.Value, driverConfig.PortMap) 1132 } 1133 1134 for _, port := range network.DynamicPorts { 1135 ports.addMapped(port.Label, network.IP, port.Value, driverConfig.PortMap) 1136 } 1137 1138 default: 1139 if len(driverConfig.PortMap) > 0 { 1140 if task.Resources.Ports != nil { 1141 return c, fmt.Errorf("'port_map' cannot map group network ports, use 'ports' instead") 1142 } 1143 return c, fmt.Errorf("Trying to map ports but no network interface is available") 1144 } 1145 } 1146 hostConfig.PortBindings = ports.publishedPorts 1147 config.ExposedPorts = ports.exposedPorts 1148 1149 // If the user specified a custom command to run, we'll inject it here. 1150 if driverConfig.Command != "" { 1151 // Validate command 1152 if err := validateCommand(driverConfig.Command, "args"); err != nil { 1153 return c, err 1154 } 1155 1156 cmd := []string{driverConfig.Command} 1157 if len(driverConfig.Args) != 0 { 1158 cmd = append(cmd, driverConfig.Args...) 1159 } 1160 logger.Debug("setting container startup command", "command", strings.Join(cmd, " ")) 1161 config.Cmd = cmd 1162 } else if len(driverConfig.Args) != 0 { 1163 config.Cmd = driverConfig.Args 1164 } 1165 1166 if len(driverConfig.Labels) > 0 { 1167 config.Labels = driverConfig.Labels 1168 } 1169 1170 labels := make(map[string]string, len(driverConfig.Labels)+1) 1171 for k, v := range driverConfig.Labels { 1172 labels[k] = v 1173 } 1174 // main mandatory label 1175 labels[dockerLabelAllocID] = task.AllocID 1176 1177 //optional labels, as configured in plugin configuration 1178 for _, configurationExtraLabel := range d.config.ExtraLabels { 1179 if glob.Glob(configurationExtraLabel, "job_name") { 1180 labels[dockerLabelJobName] = task.JobName 1181 } 1182 if glob.Glob(configurationExtraLabel, "job_id") { 1183 labels[dockerLabelJobID] = task.JobID 1184 } 1185 if glob.Glob(configurationExtraLabel, "task_group_name") { 1186 labels[dockerLabelTaskGroupName] = task.TaskGroupName 1187 } 1188 if glob.Glob(configurationExtraLabel, "task_name") { 1189 labels[dockerLabelTaskName] = task.Name 1190 } 1191 if glob.Glob(configurationExtraLabel, "namespace") { 1192 labels[dockerLabelNamespace] = task.Namespace 1193 } 1194 if glob.Glob(configurationExtraLabel, "node_name") { 1195 labels[dockerLabelNodeName] = task.NodeName 1196 } 1197 if glob.Glob(configurationExtraLabel, "node_id") { 1198 labels[dockerLabelNodeID] = task.NodeID 1199 } 1200 } 1201 1202 config.Labels = labels 1203 logger.Debug("applied labels on the container", "labels", config.Labels) 1204 1205 config.Env = task.EnvList() 1206 1207 containerName := fmt.Sprintf("%s-%s", strings.ReplaceAll(task.Name, "/", "_"), task.AllocID) 1208 logger.Debug("setting container name", "container_name", containerName) 1209 1210 var networkingConfig *docker.NetworkingConfig 1211 if len(driverConfig.NetworkAliases) > 0 || driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { 1212 networkingConfig = &docker.NetworkingConfig{ 1213 EndpointsConfig: map[string]*docker.EndpointConfig{ 1214 hostConfig.NetworkMode: {}, 1215 }, 1216 } 1217 } 1218 1219 if len(driverConfig.NetworkAliases) > 0 { 1220 networkingConfig.EndpointsConfig[hostConfig.NetworkMode].Aliases = driverConfig.NetworkAliases 1221 logger.Debug("setting container network aliases", "network_mode", hostConfig.NetworkMode, 1222 "network_aliases", strings.Join(driverConfig.NetworkAliases, ", ")) 1223 } 1224 1225 if driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { 1226 networkingConfig.EndpointsConfig[hostConfig.NetworkMode].IPAMConfig = &docker.EndpointIPAMConfig{ 1227 IPv4Address: driverConfig.IPv4Address, 1228 IPv6Address: driverConfig.IPv6Address, 1229 } 1230 logger.Debug("setting container network configuration", "network_mode", hostConfig.NetworkMode, 1231 "ipv4_address", driverConfig.IPv4Address, "ipv6_address", driverConfig.IPv6Address) 1232 } 1233 1234 if driverConfig.MacAddress != "" { 1235 config.MacAddress = driverConfig.MacAddress 1236 logger.Debug("setting container mac address", "mac_address", config.MacAddress) 1237 } 1238 1239 if driverConfig.Healthchecks.Disabled() { 1240 // Override any image-supplied health-check with disable sentinel. 1241 // https://github.com/docker/engine-api/blob/master/types/container/config.go#L16 1242 config.Healthcheck = &docker.HealthConfig{Test: []string{"NONE"}} 1243 logger.Debug("setting container healthchecks to be disabled") 1244 } 1245 1246 return docker.CreateContainerOptions{ 1247 Name: containerName, 1248 Config: config, 1249 HostConfig: hostConfig, 1250 NetworkingConfig: networkingConfig, 1251 }, nil 1252 } 1253 1254 func (d *Driver) toDockerMount(m *DockerMount, task *drivers.TaskConfig) (*docker.HostMount, error) { 1255 hm, err := m.toDockerHostMount() 1256 if err != nil { 1257 return nil, err 1258 } 1259 1260 switch hm.Type { 1261 case "bind": 1262 hm.Source = expandPath(task.TaskDir().Dir, hm.Source) 1263 1264 // paths inside alloc dir are always allowed as they mount within 1265 // a container, and treated as relative to task dir 1266 if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, hm.Source) { 1267 return nil, fmt.Errorf( 1268 "volumes are not enabled; cannot mount host path: %q %q", 1269 hm.Source, task.AllocDir) 1270 } 1271 case "tmpfs": 1272 // no source, so no sandbox check required 1273 default: // "volume", but also any new thing that comes along 1274 if !d.config.Volumes.Enabled { 1275 return nil, fmt.Errorf( 1276 "volumes are not enabled; cannot mount volume: %q", hm.Source) 1277 } 1278 } 1279 1280 return &hm, nil 1281 } 1282 1283 // detectIP of Docker container. Returns the first IP found as well as true if 1284 // the IP should be advertised (bridge network IPs return false). Returns an 1285 // empty string and false if no IP could be found. 1286 func (d *Driver) detectIP(c *docker.Container, driverConfig *TaskConfig) (string, bool) { 1287 if c.NetworkSettings == nil { 1288 // This should only happen if there's been a coding error (such 1289 // as not calling InspectContainer after CreateContainer). Code 1290 // defensively in case the Docker API changes subtly. 1291 d.logger.Error("no network settings for container", "container_id", c.ID) 1292 return "", false 1293 } 1294 1295 ip, ipName := "", "" 1296 auto := false 1297 for name, net := range c.NetworkSettings.Networks { 1298 if net.IPAddress == "" { 1299 // Ignore networks without an IP address 1300 continue 1301 } 1302 1303 ip = net.IPAddress 1304 if driverConfig.AdvertiseIPv6Addr { 1305 ip = net.GlobalIPv6Address 1306 auto = true 1307 } 1308 ipName = name 1309 1310 // Don't auto-advertise IPs for default networks (bridge on 1311 // Linux, nat on Windows) 1312 if name != "bridge" && name != "nat" { 1313 auto = true 1314 } 1315 1316 break 1317 } 1318 1319 if n := len(c.NetworkSettings.Networks); n > 1 { 1320 d.logger.Warn("multiple Docker networks for container found but Nomad only supports 1", 1321 "total_networks", n, 1322 "container_id", c.ID, 1323 "container_network", ipName) 1324 } 1325 1326 return ip, auto 1327 } 1328 1329 // containerByName finds a running container by name, and returns an error 1330 // if the container is dead or can't be found. 1331 func (d *Driver) containerByName(name string) (*docker.Container, error) { 1332 1333 client, _, err := d.dockerClients() 1334 if err != nil { 1335 return nil, err 1336 } 1337 containers, err := client.ListContainers(docker.ListContainersOptions{ 1338 All: true, 1339 }) 1340 if err != nil { 1341 d.logger.Error("failed to query list of containers matching name", 1342 "container_name", name) 1343 return nil, recoverableErrTimeouts( 1344 fmt.Errorf("Failed to query list of containers: %s", err)) 1345 } 1346 1347 // container names with a / pre-pended to the Nomad generated container names 1348 containerName := "/" + name 1349 var ( 1350 shimContainer docker.APIContainers 1351 found bool 1352 ) 1353 OUTER: 1354 for _, shimContainer = range containers { 1355 d.logger.Trace("listed container", "names", hclog.Fmt("%+v", shimContainer.Names)) 1356 for _, name := range shimContainer.Names { 1357 if name == containerName { 1358 d.logger.Trace("Found container", 1359 "container_name", containerName, "container_id", shimContainer.ID) 1360 found = true 1361 break OUTER 1362 } 1363 } 1364 } 1365 if !found { 1366 return nil, nil 1367 } 1368 1369 container, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 1370 ID: shimContainer.ID, 1371 }) 1372 if err != nil { 1373 err = fmt.Errorf("Failed to inspect container %s: %s", shimContainer.ID, err) 1374 1375 // This error is always recoverable as it could 1376 // be caused by races between listing 1377 // containers and this container being removed. 1378 // See #2802 1379 return nil, nstructs.NewRecoverableError(err, true) 1380 } 1381 return container, nil 1382 } 1383 1384 // validateCommand validates that the command only has a single value and 1385 // returns a user friendly error message telling them to use the passed 1386 // argField. 1387 func validateCommand(command, argField string) error { 1388 trimmed := strings.TrimSpace(command) 1389 if len(trimmed) == 0 { 1390 return fmt.Errorf("command empty: %q", command) 1391 } 1392 1393 if len(trimmed) != len(command) { 1394 return fmt.Errorf("command contains extra white space: %q", command) 1395 } 1396 1397 return nil 1398 } 1399 1400 func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { 1401 h, ok := d.tasks.Get(taskID) 1402 if !ok { 1403 return nil, drivers.ErrTaskNotFound 1404 } 1405 ch := make(chan *drivers.ExitResult) 1406 go d.handleWait(ctx, ch, h) 1407 return ch, nil 1408 } 1409 1410 func (d *Driver) handleWait(ctx context.Context, ch chan *drivers.ExitResult, h *taskHandle) { 1411 defer close(ch) 1412 select { 1413 case <-h.waitCh: 1414 ch <- h.ExitResult() 1415 case <-ctx.Done(): 1416 ch <- &drivers.ExitResult{ 1417 Err: ctx.Err(), 1418 } 1419 } 1420 } 1421 1422 func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { 1423 h, ok := d.tasks.Get(taskID) 1424 if !ok { 1425 return drivers.ErrTaskNotFound 1426 } 1427 1428 return h.Kill(timeout, signal) 1429 } 1430 1431 func (d *Driver) DestroyTask(taskID string, force bool) error { 1432 h, ok := d.tasks.Get(taskID) 1433 if !ok { 1434 return drivers.ErrTaskNotFound 1435 } 1436 1437 c, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 1438 ID: h.containerID, 1439 }) 1440 if err != nil { 1441 switch err.(type) { 1442 case *docker.NoSuchContainer: 1443 h.logger.Info("container was removed out of band, will proceed with DestroyTask", 1444 "error", err) 1445 default: 1446 return fmt.Errorf("failed to inspect container state: %v", err) 1447 } 1448 } else { 1449 if c.State.Running { 1450 if !force { 1451 return fmt.Errorf("must call StopTask for the given task before Destroy or set force to true") 1452 } 1453 if err := h.client.StopContainer(h.containerID, 0); err != nil { 1454 h.logger.Warn("failed to stop container during destroy", "error", err) 1455 } 1456 } 1457 1458 if h.removeContainerOnExit { 1459 if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, RemoveVolumes: true, Force: true}); err != nil { 1460 h.logger.Error("error removing container", "error", err) 1461 } 1462 } else { 1463 h.logger.Debug("not removing container due to config") 1464 } 1465 } 1466 1467 if err := d.cleanupImage(h); err != nil { 1468 h.logger.Error("failed to cleanup image after destroying container", 1469 "error", err) 1470 } 1471 1472 d.tasks.Delete(taskID) 1473 return nil 1474 } 1475 1476 // cleanupImage removes a Docker image. No error is returned if the image 1477 // doesn't exist or is still in use. Requires the global client to already be 1478 // initialized. 1479 func (d *Driver) cleanupImage(handle *taskHandle) error { 1480 if !d.config.GC.Image { 1481 return nil 1482 } 1483 1484 d.coordinator.RemoveImage(handle.containerImage, handle.task.ID) 1485 1486 return nil 1487 } 1488 1489 func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { 1490 h, ok := d.tasks.Get(taskID) 1491 if !ok { 1492 return nil, drivers.ErrTaskNotFound 1493 } 1494 1495 container, err := client.InspectContainerWithOptions(docker.InspectContainerOptions{ 1496 ID: h.containerID, 1497 }) 1498 if err != nil { 1499 return nil, fmt.Errorf("failed to inspect container %q: %v", h.containerID, err) 1500 } 1501 status := &drivers.TaskStatus{ 1502 ID: h.task.ID, 1503 Name: h.task.Name, 1504 StartedAt: container.State.StartedAt, 1505 CompletedAt: container.State.FinishedAt, 1506 DriverAttributes: map[string]string{ 1507 "container_id": container.ID, 1508 }, 1509 NetworkOverride: h.net, 1510 ExitResult: h.ExitResult(), 1511 } 1512 1513 status.State = drivers.TaskStateUnknown 1514 if container.State.Running { 1515 status.State = drivers.TaskStateRunning 1516 } 1517 if container.State.Dead { 1518 status.State = drivers.TaskStateExited 1519 } 1520 1521 return status, nil 1522 } 1523 1524 func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) { 1525 h, ok := d.tasks.Get(taskID) 1526 if !ok { 1527 return nil, drivers.ErrTaskNotFound 1528 } 1529 1530 return h.Stats(ctx, interval) 1531 } 1532 1533 func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { 1534 return d.eventer.TaskEvents(ctx) 1535 } 1536 1537 func (d *Driver) SignalTask(taskID string, signal string) error { 1538 h, ok := d.tasks.Get(taskID) 1539 if !ok { 1540 return drivers.ErrTaskNotFound 1541 } 1542 1543 sig, err := signals.Parse(signal) 1544 if err != nil { 1545 return fmt.Errorf("failed to parse signal: %v", err) 1546 } 1547 1548 // TODO: review whether we can timeout in this and other Docker API 1549 // calls without breaking the expected client behavior. 1550 // see https://github.com/hashicorp/nomad/issues/9503 1551 return h.Signal(context.Background(), sig) 1552 } 1553 1554 func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { 1555 h, ok := d.tasks.Get(taskID) 1556 if !ok { 1557 return nil, drivers.ErrTaskNotFound 1558 } 1559 1560 if len(cmd) == 0 { 1561 return nil, fmt.Errorf("cmd is required, but was empty") 1562 } 1563 1564 ctx, cancel := context.WithTimeout(context.Background(), timeout) 1565 defer cancel() 1566 1567 return h.Exec(ctx, cmd[0], cmd[1:]) 1568 } 1569 1570 var _ drivers.ExecTaskStreamingDriver = (*Driver)(nil) 1571 1572 func (d *Driver) ExecTaskStreaming(ctx context.Context, taskID string, opts *drivers.ExecOptions) (*drivers.ExitResult, error) { 1573 defer opts.Stdout.Close() 1574 defer opts.Stderr.Close() 1575 1576 done := make(chan interface{}) 1577 defer close(done) 1578 1579 h, ok := d.tasks.Get(taskID) 1580 if !ok { 1581 return nil, drivers.ErrTaskNotFound 1582 } 1583 1584 if len(opts.Command) == 0 { 1585 return nil, fmt.Errorf("command is required but was empty") 1586 } 1587 1588 createExecOpts := docker.CreateExecOptions{ 1589 AttachStdin: true, 1590 AttachStdout: true, 1591 AttachStderr: true, 1592 Tty: opts.Tty, 1593 Cmd: opts.Command, 1594 Container: h.containerID, 1595 Context: ctx, 1596 } 1597 exec, err := h.client.CreateExec(createExecOpts) 1598 if err != nil { 1599 return nil, fmt.Errorf("failed to create exec object: %v", err) 1600 } 1601 1602 go func() { 1603 for { 1604 select { 1605 case <-ctx.Done(): 1606 return 1607 case <-done: 1608 return 1609 case s, ok := <-opts.ResizeCh: 1610 if !ok { 1611 return 1612 } 1613 client.ResizeExecTTY(exec.ID, s.Height, s.Width) 1614 } 1615 } 1616 }() 1617 1618 startOpts := docker.StartExecOptions{ 1619 Detach: false, 1620 1621 // When running in TTY, we must use a raw terminal. 1622 // If not, we set RawTerminal to false to allow docker client 1623 // to interpret special stdout/stderr messages 1624 Tty: opts.Tty, 1625 RawTerminal: opts.Tty, 1626 1627 InputStream: opts.Stdin, 1628 OutputStream: opts.Stdout, 1629 ErrorStream: opts.Stderr, 1630 Context: ctx, 1631 } 1632 if err := client.StartExec(exec.ID, startOpts); err != nil { 1633 return nil, fmt.Errorf("failed to start exec: %v", err) 1634 } 1635 1636 // StartExec returns after process completes, but InspectExec seems to have a delay 1637 // get in getting status code 1638 1639 const execTerminatingTimeout = 3 * time.Second 1640 start := time.Now() 1641 var res *docker.ExecInspect 1642 for (res == nil || res.Running) && time.Since(start) <= execTerminatingTimeout { 1643 res, err = client.InspectExec(exec.ID) 1644 if err != nil { 1645 return nil, fmt.Errorf("failed to inspect exec result: %v", err) 1646 } 1647 time.Sleep(50 * time.Millisecond) 1648 } 1649 1650 if res == nil || res.Running { 1651 return nil, fmt.Errorf("failed to retrieve exec result") 1652 } 1653 1654 return &drivers.ExitResult{ 1655 ExitCode: res.ExitCode, 1656 }, nil 1657 } 1658 1659 // dockerClients creates two *docker.Client, one for long running operations and 1660 // the other for shorter operations. In test / dev mode we can use ENV vars to 1661 // connect to the docker daemon. In production mode we will read docker.endpoint 1662 // from the config file. 1663 func (d *Driver) dockerClients() (*docker.Client, *docker.Client, error) { 1664 createClientsLock.Lock() 1665 defer createClientsLock.Unlock() 1666 1667 if client != nil && waitClient != nil { 1668 return client, waitClient, nil 1669 } 1670 1671 var err error 1672 1673 // Only initialize the client if it hasn't yet been done 1674 if client == nil { 1675 client, err = d.newDockerClient(dockerTimeout) 1676 if err != nil { 1677 return nil, nil, err 1678 } 1679 } 1680 1681 // Only initialize the waitClient if it hasn't yet been done 1682 if waitClient == nil { 1683 waitClient, err = d.newDockerClient(0 * time.Minute) 1684 if err != nil { 1685 return nil, nil, err 1686 } 1687 } 1688 1689 return client, waitClient, nil 1690 } 1691 1692 // newDockerClient creates a new *docker.Client with a configurable timeout 1693 func (d *Driver) newDockerClient(timeout time.Duration) (*docker.Client, error) { 1694 var err error 1695 var merr multierror.Error 1696 var newClient *docker.Client 1697 1698 // Default to using whatever is configured in docker.endpoint. If this is 1699 // not specified we'll fall back on NewClientFromEnv which reads config from 1700 // the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and 1701 // DOCKER_CERT_PATH. This allows us to lock down the config in production 1702 // but also accept the standard ENV configs for dev and test. 1703 dockerEndpoint := d.config.Endpoint 1704 if dockerEndpoint != "" { 1705 cert := d.config.TLS.Cert 1706 key := d.config.TLS.Key 1707 ca := d.config.TLS.CA 1708 1709 if cert+key+ca != "" { 1710 d.logger.Debug("using TLS client connection", "endpoint", dockerEndpoint) 1711 newClient, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca) 1712 if err != nil { 1713 merr.Errors = append(merr.Errors, err) 1714 } 1715 } else { 1716 d.logger.Debug("using standard client connection", "endpoint", dockerEndpoint) 1717 newClient, err = docker.NewClient(dockerEndpoint) 1718 if err != nil { 1719 merr.Errors = append(merr.Errors, err) 1720 } 1721 } 1722 } else { 1723 d.logger.Debug("using client connection initialized from environment") 1724 newClient, err = docker.NewClientFromEnv() 1725 if err != nil { 1726 merr.Errors = append(merr.Errors, err) 1727 } 1728 } 1729 1730 if timeout != 0 && newClient != nil { 1731 newClient.SetTimeout(timeout) 1732 } 1733 return newClient, merr.ErrorOrNil() 1734 } 1735 1736 func sliceMergeUlimit(ulimitsRaw map[string]string) ([]docker.ULimit, error) { 1737 var ulimits []docker.ULimit 1738 1739 for name, ulimitRaw := range ulimitsRaw { 1740 if len(ulimitRaw) == 0 { 1741 return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %q, cannot be empty", name, ulimitRaw) 1742 } 1743 // hard limit is optional 1744 if !strings.Contains(ulimitRaw, ":") { 1745 ulimitRaw = ulimitRaw + ":" + ulimitRaw 1746 } 1747 1748 splitted := strings.SplitN(ulimitRaw, ":", 2) 1749 if len(splitted) < 2 { 1750 return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %v", name, ulimitRaw) 1751 } 1752 soft, err := strconv.Atoi(splitted[0]) 1753 if err != nil { 1754 return []docker.ULimit{}, fmt.Errorf("Malformed soft ulimit %v: %v", name, ulimitRaw) 1755 } 1756 hard, err := strconv.Atoi(splitted[1]) 1757 if err != nil { 1758 return []docker.ULimit{}, fmt.Errorf("Malformed hard ulimit %v: %v", name, ulimitRaw) 1759 } 1760 1761 ulimit := docker.ULimit{ 1762 Name: name, 1763 Soft: int64(soft), 1764 Hard: int64(hard), 1765 } 1766 ulimits = append(ulimits, ulimit) 1767 } 1768 return ulimits, nil 1769 } 1770 1771 func isDockerTransientError(err error) bool { 1772 if err == nil { 1773 return false 1774 } 1775 1776 errMsg := err.Error() 1777 for _, te := range dockerTransientErrs { 1778 if strings.Contains(errMsg, te) { 1779 return true 1780 } 1781 } 1782 1783 return false 1784 }