github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/drivers/shared/executor/executor_linux.go (about) 1 // +build linux 2 3 package executor 4 5 import ( 6 "context" 7 "fmt" 8 "io" 9 "os" 10 "os/exec" 11 "path" 12 "path/filepath" 13 "strings" 14 "syscall" 15 "time" 16 17 "github.com/armon/circbuf" 18 "github.com/hashicorp/consul-template/signals" 19 hclog "github.com/hashicorp/go-hclog" 20 "github.com/hashicorp/nomad/client/allocdir" 21 "github.com/hashicorp/nomad/client/stats" 22 cstructs "github.com/hashicorp/nomad/client/structs" 23 shelpers "github.com/hashicorp/nomad/helper/stats" 24 "github.com/hashicorp/nomad/helper/uuid" 25 "github.com/hashicorp/nomad/nomad/structs" 26 "github.com/hashicorp/nomad/plugins/drivers" 27 "github.com/opencontainers/runc/libcontainer" 28 "github.com/opencontainers/runc/libcontainer/cgroups" 29 lconfigs "github.com/opencontainers/runc/libcontainer/configs" 30 ldevices "github.com/opencontainers/runc/libcontainer/devices" 31 "github.com/opencontainers/runc/libcontainer/specconv" 32 lutils "github.com/opencontainers/runc/libcontainer/utils" 33 "github.com/syndtr/gocapability/capability" 34 "golang.org/x/sys/unix" 35 ) 36 37 const ( 38 defaultCgroupParent = "/nomad" 39 ) 40 41 var ( 42 // ExecutorCgroupMeasuredMemStats is the list of memory stats captured by the executor 43 ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage", "Kernel Usage", "Kernel Max Usage"} 44 45 // ExecutorCgroupMeasuredCpuStats is the list of CPU stats captures by the executor 46 ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"} 47 ) 48 49 // LibcontainerExecutor implements an Executor with the runc/libcontainer api 50 type LibcontainerExecutor struct { 51 id string 52 command *ExecCommand 53 54 logger hclog.Logger 55 56 totalCpuStats *stats.CpuStats 57 userCpuStats *stats.CpuStats 58 systemCpuStats *stats.CpuStats 59 pidCollector *pidCollector 60 61 container libcontainer.Container 62 userProc *libcontainer.Process 63 userProcExited chan interface{} 64 exitState *ProcessState 65 } 66 67 func NewExecutorWithIsolation(logger hclog.Logger) Executor { 68 logger = logger.Named("isolated_executor") 69 if err := shelpers.Init(); err != nil { 70 logger.Error("unable to initialize stats", "error", err) 71 } 72 return &LibcontainerExecutor{ 73 id: strings.Replace(uuid.Generate(), "-", "_", -1), 74 logger: logger, 75 totalCpuStats: stats.NewCpuStats(), 76 userCpuStats: stats.NewCpuStats(), 77 systemCpuStats: stats.NewCpuStats(), 78 pidCollector: newPidCollector(logger), 79 } 80 } 81 82 // Launch creates a new container in libcontainer and starts a new process with it 83 func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, error) { 84 l.logger.Trace("preparing to launch command", "command", command.Cmd, "args", strings.Join(command.Args, " ")) 85 86 if command.Resources == nil { 87 command.Resources = &drivers.Resources{ 88 NomadResources: &structs.AllocatedTaskResources{}, 89 } 90 } 91 92 l.command = command 93 94 // create a new factory which will store the container state in the allocDir 95 factory, err := libcontainer.New( 96 path.Join(command.TaskDir, "../alloc/container"), 97 libcontainer.Cgroupfs, 98 // note that os.Args[0] refers to the executor shim typically 99 // and first args arguments is ignored now due 100 // until https://github.com/opencontainers/runc/pull/1888 is merged 101 libcontainer.InitArgs(os.Args[0], "libcontainer-shim"), 102 ) 103 if err != nil { 104 return nil, fmt.Errorf("failed to create factory: %v", err) 105 } 106 107 // A container groups processes under the same isolation enforcement 108 containerCfg, err := newLibcontainerConfig(command) 109 if err != nil { 110 return nil, fmt.Errorf("failed to configure container(%s): %v", l.id, err) 111 } 112 113 container, err := factory.Create(l.id, containerCfg) 114 if err != nil { 115 return nil, fmt.Errorf("failed to create container(%s): %v", l.id, err) 116 } 117 l.container = container 118 119 // Look up the binary path and make it executable 120 absPath, err := lookupTaskBin(command) 121 122 if err != nil { 123 return nil, err 124 } 125 126 if err := makeExecutable(absPath); err != nil { 127 return nil, err 128 } 129 130 path := absPath 131 132 // Ensure that the path is contained in the chroot, and find it relative to the container 133 rel, err := filepath.Rel(command.TaskDir, path) 134 if err != nil { 135 return nil, fmt.Errorf("failed to determine relative path base=%q target=%q: %v", command.TaskDir, path, err) 136 } 137 138 // Turn relative-to-chroot path into absolute path to avoid 139 // libcontainer trying to resolve the binary using $PATH. 140 // Do *not* use filepath.Join as it will translate ".."s returned by 141 // filepath.Rel. Prepending "/" will cause the path to be rooted in the 142 // chroot which is the desired behavior. 143 path = "/" + rel 144 145 combined := append([]string{path}, command.Args...) 146 stdout, err := command.Stdout() 147 if err != nil { 148 return nil, err 149 } 150 stderr, err := command.Stderr() 151 if err != nil { 152 return nil, err 153 } 154 155 l.logger.Debug("launching", "command", command.Cmd, "args", strings.Join(command.Args, " ")) 156 157 // the task process will be started by the container 158 process := &libcontainer.Process{ 159 Args: combined, 160 Env: command.Env, 161 Stdout: stdout, 162 Stderr: stderr, 163 Init: true, 164 } 165 166 if command.User != "" { 167 process.User = command.User 168 } 169 l.userProc = process 170 171 l.totalCpuStats = stats.NewCpuStats() 172 l.userCpuStats = stats.NewCpuStats() 173 l.systemCpuStats = stats.NewCpuStats() 174 175 // Starts the task 176 if err := container.Run(process); err != nil { 177 container.Destroy() 178 return nil, err 179 } 180 181 pid, err := process.Pid() 182 if err != nil { 183 container.Destroy() 184 return nil, err 185 } 186 187 // start a goroutine to wait on the process to complete, so Wait calls can 188 // be multiplexed 189 l.userProcExited = make(chan interface{}) 190 go l.pidCollector.collectPids(l.userProcExited, l.getAllPids) 191 go l.wait() 192 193 return &ProcessState{ 194 Pid: pid, 195 ExitCode: -1, 196 Time: time.Now(), 197 }, nil 198 } 199 200 func (l *LibcontainerExecutor) getAllPids() (map[int]*nomadPid, error) { 201 pids, err := l.container.Processes() 202 if err != nil { 203 return nil, err 204 } 205 nPids := make(map[int]*nomadPid) 206 for _, pid := range pids { 207 nPids[pid] = &nomadPid{ 208 pid: pid, 209 cpuStatsTotal: stats.NewCpuStats(), 210 cpuStatsUser: stats.NewCpuStats(), 211 cpuStatsSys: stats.NewCpuStats(), 212 } 213 } 214 return nPids, nil 215 } 216 217 // Wait waits until a process has exited and returns it's exitcode and errors 218 func (l *LibcontainerExecutor) Wait(ctx context.Context) (*ProcessState, error) { 219 select { 220 case <-ctx.Done(): 221 return nil, ctx.Err() 222 case <-l.userProcExited: 223 return l.exitState, nil 224 } 225 } 226 227 func (l *LibcontainerExecutor) wait() { 228 defer close(l.userProcExited) 229 230 ps, err := l.userProc.Wait() 231 if err != nil { 232 // If the process has exited before we called wait an error is returned 233 // the process state is embedded in the error 234 if exitErr, ok := err.(*exec.ExitError); ok { 235 ps = exitErr.ProcessState 236 } else { 237 l.logger.Error("failed to call wait on user process", "error", err) 238 l.exitState = &ProcessState{Pid: 0, ExitCode: 1, Time: time.Now()} 239 return 240 } 241 } 242 243 l.command.Close() 244 245 exitCode := 1 246 var signal int 247 if status, ok := ps.Sys().(syscall.WaitStatus); ok { 248 exitCode = status.ExitStatus() 249 if status.Signaled() { 250 const exitSignalBase = 128 251 signal = int(status.Signal()) 252 exitCode = exitSignalBase + signal 253 } 254 } 255 256 l.exitState = &ProcessState{ 257 Pid: ps.Pid(), 258 ExitCode: exitCode, 259 Signal: signal, 260 Time: time.Now(), 261 } 262 } 263 264 // Shutdown stops all processes started and cleans up any resources 265 // created (such as mountpoints, devices, etc). 266 func (l *LibcontainerExecutor) Shutdown(signal string, grace time.Duration) error { 267 if l.container == nil { 268 return nil 269 } 270 271 status, err := l.container.Status() 272 if err != nil { 273 return err 274 } 275 276 defer l.container.Destroy() 277 278 if status == libcontainer.Stopped { 279 return nil 280 } 281 282 if grace > 0 { 283 if signal == "" { 284 signal = "SIGINT" 285 } 286 287 sig, ok := signals.SignalLookup[signal] 288 if !ok { 289 return fmt.Errorf("error unknown signal given for shutdown: %s", signal) 290 } 291 292 // Signal initial container processes only during graceful 293 // shutdown; hence `false` arg. 294 err = l.container.Signal(sig, false) 295 if err != nil { 296 return err 297 } 298 299 select { 300 case <-l.userProcExited: 301 return nil 302 case <-time.After(grace): 303 // Force kill all container processes after grace period, 304 // hence `true` argument. 305 if err := l.container.Signal(os.Kill, true); err != nil { 306 return err 307 } 308 } 309 } else { 310 err := l.container.Signal(os.Kill, true) 311 if err != nil { 312 return err 313 } 314 } 315 316 select { 317 case <-l.userProcExited: 318 return nil 319 case <-time.After(time.Second * 15): 320 return fmt.Errorf("process failed to exit after 15 seconds") 321 } 322 } 323 324 // UpdateResources updates the resource isolation with new values to be enforced 325 func (l *LibcontainerExecutor) UpdateResources(resources *drivers.Resources) error { 326 return nil 327 } 328 329 // Version returns the api version of the executor 330 func (l *LibcontainerExecutor) Version() (*ExecutorVersion, error) { 331 return &ExecutorVersion{Version: ExecutorVersionLatest}, nil 332 } 333 334 // Stats returns the resource statistics for processes managed by the executor 335 func (l *LibcontainerExecutor) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { 336 ch := make(chan *cstructs.TaskResourceUsage) 337 go l.handleStats(ch, ctx, interval) 338 return ch, nil 339 340 } 341 342 func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx context.Context, interval time.Duration) { 343 defer close(ch) 344 timer := time.NewTimer(0) 345 for { 346 select { 347 case <-ctx.Done(): 348 return 349 350 case <-timer.C: 351 timer.Reset(interval) 352 } 353 354 lstats, err := l.container.Stats() 355 if err != nil { 356 l.logger.Warn("error collecting stats", "error", err) 357 return 358 } 359 360 pidStats, err := l.pidCollector.pidStats() 361 if err != nil { 362 l.logger.Warn("error collecting stats", "error", err) 363 return 364 } 365 366 ts := time.Now() 367 stats := lstats.CgroupStats 368 369 // Memory Related Stats 370 swap := stats.MemoryStats.SwapUsage 371 maxUsage := stats.MemoryStats.Usage.MaxUsage 372 rss := stats.MemoryStats.Stats["rss"] 373 cache := stats.MemoryStats.Stats["cache"] 374 ms := &cstructs.MemoryStats{ 375 RSS: rss, 376 Cache: cache, 377 Swap: swap.Usage, 378 Usage: stats.MemoryStats.Usage.Usage, 379 MaxUsage: maxUsage, 380 KernelUsage: stats.MemoryStats.KernelUsage.Usage, 381 KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage, 382 Measured: ExecutorCgroupMeasuredMemStats, 383 } 384 385 // CPU Related Stats 386 totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage) 387 userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode) 388 kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode) 389 390 totalPercent := l.totalCpuStats.Percent(totalProcessCPUUsage) 391 cs := &cstructs.CpuStats{ 392 SystemMode: l.systemCpuStats.Percent(kernelModeTime), 393 UserMode: l.userCpuStats.Percent(userModeTime), 394 Percent: totalPercent, 395 ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods, 396 ThrottledTime: stats.CpuStats.ThrottlingData.ThrottledTime, 397 TotalTicks: l.systemCpuStats.TicksConsumed(totalPercent), 398 Measured: ExecutorCgroupMeasuredCpuStats, 399 } 400 taskResUsage := cstructs.TaskResourceUsage{ 401 ResourceUsage: &cstructs.ResourceUsage{ 402 MemoryStats: ms, 403 CpuStats: cs, 404 }, 405 Timestamp: ts.UTC().UnixNano(), 406 Pids: pidStats, 407 } 408 409 select { 410 case <-ctx.Done(): 411 return 412 case ch <- &taskResUsage: 413 } 414 415 } 416 } 417 418 // Signal sends a signal to the process managed by the executor 419 func (l *LibcontainerExecutor) Signal(s os.Signal) error { 420 return l.userProc.Signal(s) 421 } 422 423 // Exec starts an additional process inside the container 424 func (l *LibcontainerExecutor) Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error) { 425 combined := append([]string{cmd}, args...) 426 // Capture output 427 buf, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize)) 428 429 process := &libcontainer.Process{ 430 Args: combined, 431 Env: l.command.Env, 432 Stdout: buf, 433 Stderr: buf, 434 } 435 436 err := l.container.Run(process) 437 if err != nil { 438 return nil, 0, err 439 } 440 441 waitCh := make(chan *waitResult) 442 defer close(waitCh) 443 go l.handleExecWait(waitCh, process) 444 445 select { 446 case result := <-waitCh: 447 ps := result.ps 448 if result.err != nil { 449 if exitErr, ok := result.err.(*exec.ExitError); ok { 450 ps = exitErr.ProcessState 451 } else { 452 return nil, 0, result.err 453 } 454 } 455 var exitCode int 456 if status, ok := ps.Sys().(syscall.WaitStatus); ok { 457 exitCode = status.ExitStatus() 458 } 459 return buf.Bytes(), exitCode, nil 460 461 case <-time.After(time.Until(deadline)): 462 process.Signal(os.Kill) 463 return nil, 0, context.DeadlineExceeded 464 } 465 466 } 467 468 func (l *LibcontainerExecutor) newTerminalSocket() (pty func() (*os.File, error), tty *os.File, err error) { 469 parent, child, err := lutils.NewSockPair("socket") 470 if err != nil { 471 return nil, nil, fmt.Errorf("failed to create terminal: %v", err) 472 } 473 474 return func() (*os.File, error) { return lutils.RecvFd(parent) }, child, err 475 476 } 477 478 func (l *LibcontainerExecutor) ExecStreaming(ctx context.Context, cmd []string, tty bool, 479 stream drivers.ExecTaskStream) error { 480 481 // the task process will be started by the container 482 process := &libcontainer.Process{ 483 Args: cmd, 484 Env: l.userProc.Env, 485 User: l.userProc.User, 486 Init: false, 487 Cwd: "/", 488 } 489 490 execHelper := &execHelper{ 491 logger: l.logger, 492 493 newTerminal: l.newTerminalSocket, 494 setTTY: func(tty *os.File) error { 495 process.ConsoleSocket = tty 496 return nil 497 }, 498 setIO: func(stdin io.Reader, stdout, stderr io.Writer) error { 499 process.Stdin = stdin 500 process.Stdout = stdout 501 process.Stderr = stderr 502 return nil 503 }, 504 505 processStart: func() error { return l.container.Run(process) }, 506 processWait: func() (*os.ProcessState, error) { 507 return process.Wait() 508 }, 509 } 510 511 return execHelper.run(ctx, tty, stream) 512 513 } 514 515 type waitResult struct { 516 ps *os.ProcessState 517 err error 518 } 519 520 func (l *LibcontainerExecutor) handleExecWait(ch chan *waitResult, process *libcontainer.Process) { 521 ps, err := process.Wait() 522 ch <- &waitResult{ps, err} 523 } 524 525 func configureCapabilities(cfg *lconfigs.Config, command *ExecCommand) error { 526 // TODO: allow better control of these 527 // use capabilities list as prior to adopting libcontainer in 0.9 528 allCaps := supportedCaps() 529 530 // match capabilities used in Nomad 0.8 531 if command.User == "root" { 532 cfg.Capabilities = &lconfigs.Capabilities{ 533 Bounding: allCaps, 534 Permitted: allCaps, 535 Effective: allCaps, 536 Ambient: nil, 537 Inheritable: nil, 538 } 539 } else { 540 cfg.Capabilities = &lconfigs.Capabilities{ 541 Bounding: allCaps, 542 } 543 } 544 545 return nil 546 } 547 548 // supportedCaps returns a list of all supported capabilities in kernel 549 func supportedCaps() []string { 550 allCaps := []string{} 551 last := capability.CAP_LAST_CAP 552 // workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap 553 if last == capability.Cap(63) { 554 last = capability.CAP_BLOCK_SUSPEND 555 } 556 for _, cap := range capability.List() { 557 if cap > last { 558 continue 559 } 560 allCaps = append(allCaps, fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))) 561 } 562 return allCaps 563 } 564 565 func configureNamespaces(pidMode, ipcMode string) lconfigs.Namespaces { 566 namespaces := lconfigs.Namespaces{{Type: lconfigs.NEWNS}} 567 if pidMode == IsolationModePrivate { 568 namespaces = append(namespaces, lconfigs.Namespace{Type: lconfigs.NEWPID}) 569 } 570 if ipcMode == IsolationModePrivate { 571 namespaces = append(namespaces, lconfigs.Namespace{Type: lconfigs.NEWIPC}) 572 } 573 return namespaces 574 } 575 576 // configureIsolation prepares the isolation primitives of the container. 577 // The process runs in a container configured with the following: 578 // 579 // * the task directory as the chroot 580 // * dedicated mount points namespace, but shares the PID, User, domain, network namespaces with host 581 // * small subset of devices (e.g. stdout/stderr/stdin, tty, shm, pts); default to using the same set of devices as Docker 582 // * some special filesystems: `/proc`, `/sys`. Some case is given to avoid exec escaping or setting malicious values through them. 583 func configureIsolation(cfg *lconfigs.Config, command *ExecCommand) error { 584 defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV 585 586 // set the new root directory for the container 587 cfg.Rootfs = command.TaskDir 588 589 // disable pivot_root if set in the driver's configuration 590 cfg.NoPivotRoot = command.NoPivotRoot 591 592 // set up default namespaces as configured 593 cfg.Namespaces = configureNamespaces(command.ModePID, command.ModeIPC) 594 595 if command.NetworkIsolation != nil { 596 cfg.Namespaces = append(cfg.Namespaces, lconfigs.Namespace{ 597 Type: lconfigs.NEWNET, 598 Path: command.NetworkIsolation.Path, 599 }) 600 } 601 602 // paths to mask using a bind mount to /dev/null to prevent reading 603 cfg.MaskPaths = []string{ 604 "/proc/kcore", 605 "/sys/firmware", 606 } 607 608 // paths that should be remounted as readonly inside the container 609 cfg.ReadonlyPaths = []string{ 610 "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", 611 } 612 613 cfg.Devices = specconv.AllowedDevices 614 if len(command.Devices) > 0 { 615 devs, err := cmdDevices(command.Devices) 616 if err != nil { 617 return err 618 } 619 cfg.Devices = append(cfg.Devices, devs...) 620 } 621 622 cfg.Mounts = []*lconfigs.Mount{ 623 { 624 Source: "tmpfs", 625 Destination: "/dev", 626 Device: "tmpfs", 627 Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, 628 Data: "mode=755", 629 }, 630 { 631 Source: "proc", 632 Destination: "/proc", 633 Device: "proc", 634 Flags: defaultMountFlags, 635 }, 636 { 637 Source: "devpts", 638 Destination: "/dev/pts", 639 Device: "devpts", 640 Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, 641 Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", 642 }, 643 { 644 Device: "tmpfs", 645 Source: "shm", 646 Destination: "/dev/shm", 647 Data: "mode=1777,size=65536k", 648 Flags: defaultMountFlags, 649 }, 650 { 651 Source: "mqueue", 652 Destination: "/dev/mqueue", 653 Device: "mqueue", 654 Flags: defaultMountFlags, 655 }, 656 { 657 Source: "sysfs", 658 Destination: "/sys", 659 Device: "sysfs", 660 Flags: defaultMountFlags | syscall.MS_RDONLY, 661 }, 662 } 663 664 if len(command.Mounts) > 0 { 665 cfg.Mounts = append(cfg.Mounts, cmdMounts(command.Mounts)...) 666 } 667 668 return nil 669 } 670 671 func configureCgroups(cfg *lconfigs.Config, command *ExecCommand) error { 672 673 // If resources are not limited then manually create cgroups needed 674 if !command.ResourceLimits { 675 return configureBasicCgroups(cfg) 676 } 677 678 id := uuid.Generate() 679 cfg.Cgroups.Path = filepath.Join("/", defaultCgroupParent, id) 680 681 if command.Resources == nil || command.Resources.NomadResources == nil { 682 return nil 683 } 684 685 if mb := command.Resources.NomadResources.Memory.MemoryMB; mb > 0 { 686 // Total amount of memory allowed to consume 687 cfg.Cgroups.Resources.Memory = mb * 1024 * 1024 688 // Disable swap to avoid issues on the machine 689 var memSwappiness uint64 690 cfg.Cgroups.Resources.MemorySwappiness = &memSwappiness 691 } 692 693 cpuShares := command.Resources.NomadResources.Cpu.CpuShares 694 if cpuShares < 2 { 695 return fmt.Errorf("resources.Cpu.CpuShares must be equal to or greater than 2: %v", cpuShares) 696 } 697 698 // Set the relative CPU shares for this cgroup. 699 cfg.Cgroups.Resources.CpuShares = uint64(cpuShares) 700 701 return nil 702 } 703 704 func configureBasicCgroups(cfg *lconfigs.Config) error { 705 id := uuid.Generate() 706 707 // Manually create freezer cgroup 708 709 subsystem := "freezer" 710 711 path, err := getCgroupPathHelper(subsystem, filepath.Join(defaultCgroupParent, id)) 712 if err != nil { 713 return fmt.Errorf("failed to find %s cgroup mountpoint: %v", subsystem, err) 714 } 715 716 if err = os.MkdirAll(path, 0755); err != nil { 717 return err 718 } 719 720 cfg.Cgroups.Paths = map[string]string{ 721 subsystem: path, 722 } 723 return nil 724 } 725 726 func getCgroupPathHelper(subsystem, cgroup string) (string, error) { 727 mnt, root, err := cgroups.FindCgroupMountpointAndRoot("", subsystem) 728 if err != nil { 729 return "", err 730 } 731 732 // This is needed for nested containers, because in /proc/self/cgroup we 733 // see paths from host, which don't exist in container. 734 relCgroup, err := filepath.Rel(root, cgroup) 735 if err != nil { 736 return "", err 737 } 738 739 return filepath.Join(mnt, relCgroup), nil 740 } 741 742 func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { 743 cfg := &lconfigs.Config{ 744 Cgroups: &lconfigs.Cgroup{ 745 Resources: &lconfigs.Resources{ 746 MemorySwappiness: nil, 747 }, 748 }, 749 Version: "1.0.0", 750 } 751 for _, device := range specconv.AllowedDevices { 752 cfg.Cgroups.Resources.Devices = append(cfg.Cgroups.Resources.Devices, &device.DeviceRule) 753 } 754 755 if err := configureCapabilities(cfg, command); err != nil { 756 return nil, err 757 } 758 if err := configureIsolation(cfg, command); err != nil { 759 return nil, err 760 } 761 if err := configureCgroups(cfg, command); err != nil { 762 return nil, err 763 } 764 return cfg, nil 765 } 766 767 // cmdDevices converts a list of driver.DeviceConfigs into excutor.Devices. 768 func cmdDevices(devices []*drivers.DeviceConfig) ([]*lconfigs.Device, error) { 769 if len(devices) == 0 { 770 return nil, nil 771 } 772 773 r := make([]*lconfigs.Device, len(devices)) 774 775 for i, d := range devices { 776 ed, err := ldevices.DeviceFromPath(d.HostPath, d.Permissions) 777 if err != nil { 778 return nil, fmt.Errorf("failed to make device out for %s: %v", d.HostPath, err) 779 } 780 ed.Path = d.TaskPath 781 r[i] = ed 782 } 783 784 return r, nil 785 } 786 787 var userMountToUnixMount = map[string]int{ 788 // Empty string maps to `rprivate` for backwards compatibility in restored 789 // older tasks, where mount propagation will not be present. 790 "": unix.MS_PRIVATE | unix.MS_REC, // rprivate 791 structs.VolumeMountPropagationPrivate: unix.MS_PRIVATE | unix.MS_REC, // rprivate 792 structs.VolumeMountPropagationHostToTask: unix.MS_SLAVE | unix.MS_REC, // rslave 793 structs.VolumeMountPropagationBidirectional: unix.MS_SHARED | unix.MS_REC, // rshared 794 } 795 796 // cmdMounts converts a list of driver.MountConfigs into excutor.Mounts. 797 func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount { 798 if len(mounts) == 0 { 799 return nil 800 } 801 802 r := make([]*lconfigs.Mount, len(mounts)) 803 804 for i, m := range mounts { 805 flags := unix.MS_BIND 806 if m.Readonly { 807 flags |= unix.MS_RDONLY 808 } 809 810 r[i] = &lconfigs.Mount{ 811 Source: m.HostPath, 812 Destination: m.TaskPath, 813 Device: "bind", 814 Flags: flags, 815 PropagationFlags: []int{userMountToUnixMount[m.PropagationMode]}, 816 } 817 } 818 819 return r 820 } 821 822 // lookupTaskBin finds the file `bin` in taskDir/local, taskDir in that order, then performs 823 // a PATH search inside taskDir. It returns an absolute path. See also executor.lookupBin 824 func lookupTaskBin(command *ExecCommand) (string, error) { 825 taskDir := command.TaskDir 826 bin := command.Cmd 827 828 // Check in the local directory 829 localDir := filepath.Join(taskDir, allocdir.TaskLocal) 830 local := filepath.Join(localDir, bin) 831 if _, err := os.Stat(local); err == nil { 832 return local, nil 833 } 834 835 // Check at the root of the task's directory 836 root := filepath.Join(taskDir, bin) 837 if _, err := os.Stat(root); err == nil { 838 return root, nil 839 } 840 841 if strings.Contains(bin, "/") { 842 return "", fmt.Errorf("file %s not found under path %s", bin, taskDir) 843 } 844 845 path := "/usr/local/bin:/usr/bin:/bin" 846 847 return lookPathIn(path, taskDir, bin) 848 } 849 850 // lookPathIn looks for a file with PATH inside the directory root. Like exec.LookPath 851 func lookPathIn(path string, root string, bin string) (string, error) { 852 // exec.LookPath(file string) 853 for _, dir := range filepath.SplitList(path) { 854 if dir == "" { 855 // match unix shell behavior, empty path element == . 856 dir = "." 857 } 858 path := filepath.Join(root, dir, bin) 859 f, err := os.Stat(path) 860 if err != nil { 861 continue 862 } 863 if m := f.Mode(); !m.IsDir() { 864 return path, nil 865 } 866 } 867 return "", fmt.Errorf("file %s not found under path %s", bin, root) 868 }