github.com/bigcommerce/nomad@v0.9.3-bc/drivers/shared/executor/executor_linux.go (about) 1 // +build linux 2 3 package executor 4 5 import ( 6 "context" 7 "fmt" 8 "io" 9 "os" 10 "os/exec" 11 "path" 12 "path/filepath" 13 "strings" 14 "syscall" 15 "time" 16 17 "github.com/armon/circbuf" 18 "github.com/hashicorp/consul-template/signals" 19 hclog "github.com/hashicorp/go-hclog" 20 multierror "github.com/hashicorp/go-multierror" 21 "github.com/hashicorp/nomad/client/allocdir" 22 "github.com/hashicorp/nomad/client/stats" 23 cstructs "github.com/hashicorp/nomad/client/structs" 24 shelpers "github.com/hashicorp/nomad/helper/stats" 25 "github.com/hashicorp/nomad/helper/uuid" 26 "github.com/hashicorp/nomad/nomad/structs" 27 "github.com/hashicorp/nomad/plugins/drivers" 28 "github.com/opencontainers/runc/libcontainer" 29 "github.com/opencontainers/runc/libcontainer/cgroups" 30 cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs" 31 lconfigs "github.com/opencontainers/runc/libcontainer/configs" 32 ldevices "github.com/opencontainers/runc/libcontainer/devices" 33 lutils "github.com/opencontainers/runc/libcontainer/utils" 34 "github.com/syndtr/gocapability/capability" 35 "golang.org/x/sys/unix" 36 ) 37 38 const ( 39 defaultCgroupParent = "nomad" 40 ) 41 42 var ( 43 // ExecutorCgroupMeasuredMemStats is the list of memory stats captured by the executor 44 ExecutorCgroupMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Usage", "Max Usage", "Kernel Usage", "Kernel Max Usage"} 45 46 // ExecutorCgroupMeasuredCpuStats is the list of CPU stats captures by the executor 47 ExecutorCgroupMeasuredCpuStats = []string{"System Mode", "User Mode", "Throttled Periods", "Throttled Time", "Percent"} 48 ) 49 50 // LibcontainerExecutor implements an Executor with the runc/libcontainer api 51 type LibcontainerExecutor struct { 52 id string 53 command *ExecCommand 54 55 logger hclog.Logger 56 57 totalCpuStats *stats.CpuStats 58 userCpuStats *stats.CpuStats 59 systemCpuStats *stats.CpuStats 60 pidCollector *pidCollector 61 62 container libcontainer.Container 63 userProc *libcontainer.Process 64 userProcExited chan interface{} 65 exitState *ProcessState 66 } 67 68 func NewExecutorWithIsolation(logger hclog.Logger) Executor { 69 logger = logger.Named("isolated_executor") 70 if err := shelpers.Init(); err != nil { 71 logger.Error("unable to initialize stats", "error", err) 72 } 73 return &LibcontainerExecutor{ 74 id: strings.Replace(uuid.Generate(), "-", "_", -1), 75 logger: logger, 76 totalCpuStats: stats.NewCpuStats(), 77 userCpuStats: stats.NewCpuStats(), 78 systemCpuStats: stats.NewCpuStats(), 79 pidCollector: newPidCollector(logger), 80 } 81 } 82 83 // Launch creates a new container in libcontainer and starts a new process with it 84 func (l *LibcontainerExecutor) Launch(command *ExecCommand) (*ProcessState, error) { 85 l.logger.Trace("preparing to launch command", "command", command.Cmd, "args", strings.Join(command.Args, " ")) 86 87 if command.Resources == nil { 88 command.Resources = &drivers.Resources{ 89 NomadResources: &structs.AllocatedTaskResources{}, 90 } 91 } 92 93 l.command = command 94 95 // Move to the root cgroup until process is started 96 subsystems, err := cgroups.GetAllSubsystems() 97 if err != nil { 98 return nil, err 99 } 100 if err := JoinRootCgroup(subsystems); err != nil { 101 return nil, err 102 } 103 104 // create a new factory which will store the container state in the allocDir 105 factory, err := libcontainer.New( 106 path.Join(command.TaskDir, "../alloc/container"), 107 libcontainer.Cgroupfs, 108 // note that os.Args[0] refers to the executor shim typically 109 // and first args arguments is ignored now due 110 // until https://github.com/opencontainers/runc/pull/1888 is merged 111 libcontainer.InitArgs(os.Args[0], "libcontainer-shim"), 112 ) 113 if err != nil { 114 return nil, fmt.Errorf("failed to create factory: %v", err) 115 } 116 117 // A container groups processes under the same isolation enforcement 118 containerCfg, err := newLibcontainerConfig(command) 119 if err != nil { 120 return nil, fmt.Errorf("failed to configure container(%s): %v", l.id, err) 121 } 122 123 container, err := factory.Create(l.id, containerCfg) 124 if err != nil { 125 return nil, fmt.Errorf("failed to create container(%s): %v", l.id, err) 126 } 127 l.container = container 128 129 // Look up the binary path and make it executable 130 absPath, err := lookupTaskBin(command) 131 132 if err != nil { 133 return nil, err 134 } 135 136 if err := makeExecutable(absPath); err != nil { 137 return nil, err 138 } 139 140 path := absPath 141 142 // Ensure that the path is contained in the chroot, and find it relative to the container 143 rel, err := filepath.Rel(command.TaskDir, path) 144 if err != nil { 145 return nil, fmt.Errorf("failed to determine relative path base=%q target=%q: %v", command.TaskDir, path, err) 146 } 147 148 // Turn relative-to-chroot path into absolute path to avoid 149 // libcontainer trying to resolve the binary using $PATH. 150 // Do *not* use filepath.Join as it will translate ".."s returned by 151 // filepath.Rel. Prepending "/" will cause the path to be rooted in the 152 // chroot which is the desired behavior. 153 path = "/" + rel 154 155 combined := append([]string{path}, command.Args...) 156 stdout, err := command.Stdout() 157 if err != nil { 158 return nil, err 159 } 160 stderr, err := command.Stderr() 161 if err != nil { 162 return nil, err 163 } 164 165 l.logger.Debug("launching", "command", command.Cmd, "args", strings.Join(command.Args, " ")) 166 167 // the task process will be started by the container 168 process := &libcontainer.Process{ 169 Args: combined, 170 Env: command.Env, 171 Stdout: stdout, 172 Stderr: stderr, 173 Init: true, 174 } 175 176 if command.User != "" { 177 process.User = command.User 178 } 179 l.userProc = process 180 181 l.totalCpuStats = stats.NewCpuStats() 182 l.userCpuStats = stats.NewCpuStats() 183 l.systemCpuStats = stats.NewCpuStats() 184 185 // Starts the task 186 if err := container.Run(process); err != nil { 187 container.Destroy() 188 return nil, err 189 } 190 191 pid, err := process.Pid() 192 if err != nil { 193 container.Destroy() 194 return nil, err 195 } 196 197 // Join process cgroups 198 containerState, err := container.State() 199 if err != nil { 200 l.logger.Error("error entering user process cgroups", "executor_pid", os.Getpid(), "error", err) 201 } 202 if err := cgroups.EnterPid(containerState.CgroupPaths, os.Getpid()); err != nil { 203 l.logger.Error("error entering user process cgroups", "executor_pid", os.Getpid(), "error", err) 204 } 205 206 // start a goroutine to wait on the process to complete, so Wait calls can 207 // be multiplexed 208 l.userProcExited = make(chan interface{}) 209 go l.pidCollector.collectPids(l.userProcExited, l.getAllPids) 210 go l.wait() 211 212 return &ProcessState{ 213 Pid: pid, 214 ExitCode: -1, 215 Time: time.Now(), 216 }, nil 217 } 218 219 func (l *LibcontainerExecutor) getAllPids() (map[int]*nomadPid, error) { 220 pids, err := l.container.Processes() 221 if err != nil { 222 return nil, err 223 } 224 nPids := make(map[int]*nomadPid) 225 for _, pid := range pids { 226 nPids[pid] = &nomadPid{ 227 pid: pid, 228 cpuStatsTotal: stats.NewCpuStats(), 229 cpuStatsUser: stats.NewCpuStats(), 230 cpuStatsSys: stats.NewCpuStats(), 231 } 232 } 233 return nPids, nil 234 } 235 236 // Wait waits until a process has exited and returns it's exitcode and errors 237 func (l *LibcontainerExecutor) Wait(ctx context.Context) (*ProcessState, error) { 238 select { 239 case <-ctx.Done(): 240 return nil, ctx.Err() 241 case <-l.userProcExited: 242 return l.exitState, nil 243 } 244 } 245 246 func (l *LibcontainerExecutor) wait() { 247 defer close(l.userProcExited) 248 249 ps, err := l.userProc.Wait() 250 if err != nil { 251 // If the process has exited before we called wait an error is returned 252 // the process state is embedded in the error 253 if exitErr, ok := err.(*exec.ExitError); ok { 254 ps = exitErr.ProcessState 255 } else { 256 l.logger.Error("failed to call wait on user process", "error", err) 257 l.exitState = &ProcessState{Pid: 0, ExitCode: 1, Time: time.Now()} 258 return 259 } 260 } 261 262 l.command.Close() 263 264 exitCode := 1 265 var signal int 266 if status, ok := ps.Sys().(syscall.WaitStatus); ok { 267 exitCode = status.ExitStatus() 268 if status.Signaled() { 269 const exitSignalBase = 128 270 signal = int(status.Signal()) 271 exitCode = exitSignalBase + signal 272 } 273 } 274 275 l.exitState = &ProcessState{ 276 Pid: ps.Pid(), 277 ExitCode: exitCode, 278 Signal: signal, 279 Time: time.Now(), 280 } 281 } 282 283 // Shutdown stops all processes started and cleans up any resources 284 // created (such as mountpoints, devices, etc). 285 func (l *LibcontainerExecutor) Shutdown(signal string, grace time.Duration) error { 286 if l.container == nil { 287 return nil 288 } 289 290 // move executor to root cgroup 291 subsystems, err := cgroups.GetAllSubsystems() 292 if err != nil { 293 return err 294 } 295 if err := JoinRootCgroup(subsystems); err != nil { 296 return err 297 } 298 299 status, err := l.container.Status() 300 if err != nil { 301 return err 302 } 303 304 defer l.container.Destroy() 305 306 if status == libcontainer.Stopped { 307 return nil 308 } 309 310 if grace > 0 { 311 if signal == "" { 312 signal = "SIGINT" 313 } 314 315 sig, ok := signals.SignalLookup[signal] 316 if !ok { 317 return fmt.Errorf("error unknown signal given for shutdown: %s", signal) 318 } 319 320 // Signal initial container processes only during graceful 321 // shutdown; hence `false` arg. 322 err = l.container.Signal(sig, false) 323 if err != nil { 324 return err 325 } 326 327 select { 328 case <-l.userProcExited: 329 return nil 330 case <-time.After(grace): 331 // Force kill all container processes after grace period, 332 // hence `true` argument. 333 if err := l.container.Signal(os.Kill, true); err != nil { 334 return err 335 } 336 } 337 } else { 338 if err := l.container.Signal(os.Kill, true); err != nil { 339 return err 340 } 341 } 342 343 select { 344 case <-l.userProcExited: 345 return nil 346 case <-time.After(time.Second * 15): 347 return fmt.Errorf("process failed to exit after 15 seconds") 348 } 349 } 350 351 // UpdateResources updates the resource isolation with new values to be enforced 352 func (l *LibcontainerExecutor) UpdateResources(resources *drivers.Resources) error { 353 return nil 354 } 355 356 // Version returns the api version of the executor 357 func (l *LibcontainerExecutor) Version() (*ExecutorVersion, error) { 358 return &ExecutorVersion{Version: ExecutorVersionLatest}, nil 359 } 360 361 // Stats returns the resource statistics for processes managed by the executor 362 func (l *LibcontainerExecutor) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { 363 ch := make(chan *cstructs.TaskResourceUsage) 364 go l.handleStats(ch, ctx, interval) 365 return ch, nil 366 367 } 368 369 func (l *LibcontainerExecutor) handleStats(ch chan *cstructs.TaskResourceUsage, ctx context.Context, interval time.Duration) { 370 defer close(ch) 371 timer := time.NewTimer(0) 372 for { 373 select { 374 case <-ctx.Done(): 375 return 376 377 case <-timer.C: 378 timer.Reset(interval) 379 } 380 381 lstats, err := l.container.Stats() 382 if err != nil { 383 l.logger.Warn("error collecting stats", "error", err) 384 return 385 } 386 387 pidStats, err := l.pidCollector.pidStats() 388 if err != nil { 389 l.logger.Warn("error collecting stats", "error", err) 390 return 391 } 392 393 ts := time.Now() 394 stats := lstats.CgroupStats 395 396 // Memory Related Stats 397 swap := stats.MemoryStats.SwapUsage 398 maxUsage := stats.MemoryStats.Usage.MaxUsage 399 rss := stats.MemoryStats.Stats["rss"] 400 cache := stats.MemoryStats.Stats["cache"] 401 ms := &cstructs.MemoryStats{ 402 RSS: rss, 403 Cache: cache, 404 Swap: swap.Usage, 405 Usage: stats.MemoryStats.Usage.Usage, 406 MaxUsage: maxUsage, 407 KernelUsage: stats.MemoryStats.KernelUsage.Usage, 408 KernelMaxUsage: stats.MemoryStats.KernelUsage.MaxUsage, 409 Measured: ExecutorCgroupMeasuredMemStats, 410 } 411 412 // CPU Related Stats 413 totalProcessCPUUsage := float64(stats.CpuStats.CpuUsage.TotalUsage) 414 userModeTime := float64(stats.CpuStats.CpuUsage.UsageInUsermode) 415 kernelModeTime := float64(stats.CpuStats.CpuUsage.UsageInKernelmode) 416 417 totalPercent := l.totalCpuStats.Percent(totalProcessCPUUsage) 418 cs := &cstructs.CpuStats{ 419 SystemMode: l.systemCpuStats.Percent(kernelModeTime), 420 UserMode: l.userCpuStats.Percent(userModeTime), 421 Percent: totalPercent, 422 ThrottledPeriods: stats.CpuStats.ThrottlingData.ThrottledPeriods, 423 ThrottledTime: stats.CpuStats.ThrottlingData.ThrottledTime, 424 TotalTicks: l.systemCpuStats.TicksConsumed(totalPercent), 425 Measured: ExecutorCgroupMeasuredCpuStats, 426 } 427 taskResUsage := cstructs.TaskResourceUsage{ 428 ResourceUsage: &cstructs.ResourceUsage{ 429 MemoryStats: ms, 430 CpuStats: cs, 431 }, 432 Timestamp: ts.UTC().UnixNano(), 433 Pids: pidStats, 434 } 435 436 select { 437 case <-ctx.Done(): 438 return 439 case ch <- &taskResUsage: 440 } 441 442 } 443 } 444 445 // Signal sends a signal to the process managed by the executor 446 func (l *LibcontainerExecutor) Signal(s os.Signal) error { 447 return l.userProc.Signal(s) 448 } 449 450 // Exec starts an additional process inside the container 451 func (l *LibcontainerExecutor) Exec(deadline time.Time, cmd string, args []string) ([]byte, int, error) { 452 combined := append([]string{cmd}, args...) 453 // Capture output 454 buf, _ := circbuf.NewBuffer(int64(drivers.CheckBufSize)) 455 456 process := &libcontainer.Process{ 457 Args: combined, 458 Env: l.command.Env, 459 Stdout: buf, 460 Stderr: buf, 461 } 462 463 err := l.container.Run(process) 464 if err != nil { 465 return nil, 0, err 466 } 467 468 waitCh := make(chan *waitResult) 469 defer close(waitCh) 470 go l.handleExecWait(waitCh, process) 471 472 select { 473 case result := <-waitCh: 474 ps := result.ps 475 if result.err != nil { 476 if exitErr, ok := result.err.(*exec.ExitError); ok { 477 ps = exitErr.ProcessState 478 } else { 479 return nil, 0, result.err 480 } 481 } 482 var exitCode int 483 if status, ok := ps.Sys().(syscall.WaitStatus); ok { 484 exitCode = status.ExitStatus() 485 } 486 return buf.Bytes(), exitCode, nil 487 488 case <-time.After(time.Until(deadline)): 489 process.Signal(os.Kill) 490 return nil, 0, context.DeadlineExceeded 491 } 492 493 } 494 495 func (l *LibcontainerExecutor) newTerminalSocket() (pty func() (*os.File, error), tty *os.File, err error) { 496 parent, child, err := lutils.NewSockPair("socket") 497 if err != nil { 498 return nil, nil, fmt.Errorf("failed to create terminal: %v", err) 499 } 500 501 return func() (*os.File, error) { return lutils.RecvFd(parent) }, child, err 502 503 } 504 505 func (l *LibcontainerExecutor) ExecStreaming(ctx context.Context, cmd []string, tty bool, 506 stream drivers.ExecTaskStream) error { 507 508 // the task process will be started by the container 509 process := &libcontainer.Process{ 510 Args: cmd, 511 Env: l.userProc.Env, 512 User: l.userProc.User, 513 Init: false, 514 Cwd: "/", 515 } 516 517 execHelper := &execHelper{ 518 logger: l.logger, 519 520 newTerminal: l.newTerminalSocket, 521 setTTY: func(tty *os.File) error { 522 process.ConsoleSocket = tty 523 return nil 524 }, 525 setIO: func(stdin io.Reader, stdout, stderr io.Writer) error { 526 process.Stdin = stdin 527 process.Stdout = stdout 528 process.Stderr = stderr 529 return nil 530 }, 531 532 processStart: func() error { return l.container.Run(process) }, 533 processWait: func() (*os.ProcessState, error) { 534 return process.Wait() 535 }, 536 } 537 538 return execHelper.run(ctx, tty, stream) 539 540 } 541 542 type waitResult struct { 543 ps *os.ProcessState 544 err error 545 } 546 547 func (l *LibcontainerExecutor) handleExecWait(ch chan *waitResult, process *libcontainer.Process) { 548 ps, err := process.Wait() 549 ch <- &waitResult{ps, err} 550 } 551 552 func configureCapabilities(cfg *lconfigs.Config, command *ExecCommand) error { 553 // TODO: allow better control of these 554 // use capabilities list as prior to adopting libcontainer in 0.9 555 allCaps := supportedCaps() 556 557 // match capabilities used in Nomad 0.8 558 if command.User == "root" { 559 cfg.Capabilities = &lconfigs.Capabilities{ 560 Bounding: allCaps, 561 Permitted: allCaps, 562 Effective: allCaps, 563 Ambient: nil, 564 Inheritable: nil, 565 } 566 } else { 567 cfg.Capabilities = &lconfigs.Capabilities{ 568 Bounding: allCaps, 569 } 570 } 571 572 return nil 573 } 574 575 // supportedCaps returns a list of all supported capabilities in kernel 576 func supportedCaps() []string { 577 allCaps := []string{} 578 last := capability.CAP_LAST_CAP 579 // workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap 580 if last == capability.Cap(63) { 581 last = capability.CAP_BLOCK_SUSPEND 582 } 583 for _, cap := range capability.List() { 584 if cap > last { 585 continue 586 } 587 allCaps = append(allCaps, fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))) 588 } 589 return allCaps 590 } 591 592 // configureIsolation prepares the isolation primitives of the container. 593 // The process runs in a container configured with the following: 594 // 595 // * the task directory as the chroot 596 // * dedicated mount points namespace, but shares the PID, User, domain, network namespaces with host 597 // * small subset of devices (e.g. stdout/stderr/stdin, tty, shm, pts); default to using the same set of devices as Docker 598 // * some special filesystems: `/proc`, `/sys`. Some case is given to avoid exec escaping or setting malicious values through them. 599 func configureIsolation(cfg *lconfigs.Config, command *ExecCommand) error { 600 defaultMountFlags := syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV 601 602 // set the new root directory for the container 603 cfg.Rootfs = command.TaskDir 604 605 // launch with mount namespace 606 cfg.Namespaces = lconfigs.Namespaces{ 607 {Type: lconfigs.NEWNS}, 608 } 609 610 // paths to mask using a bind mount to /dev/null to prevent reading 611 cfg.MaskPaths = []string{ 612 "/proc/kcore", 613 "/sys/firmware", 614 } 615 616 // paths that should be remounted as readonly inside the container 617 cfg.ReadonlyPaths = []string{ 618 "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", 619 } 620 621 cfg.Devices = lconfigs.DefaultAutoCreatedDevices 622 if len(command.Devices) > 0 { 623 devs, err := cmdDevices(command.Devices) 624 if err != nil { 625 return err 626 } 627 cfg.Devices = append(cfg.Devices, devs...) 628 } 629 630 cfg.Mounts = []*lconfigs.Mount{ 631 { 632 Source: "tmpfs", 633 Destination: "/dev", 634 Device: "tmpfs", 635 Flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, 636 Data: "mode=755", 637 }, 638 { 639 Source: "proc", 640 Destination: "/proc", 641 Device: "proc", 642 Flags: defaultMountFlags, 643 }, 644 { 645 Source: "devpts", 646 Destination: "/dev/pts", 647 Device: "devpts", 648 Flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, 649 Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", 650 }, 651 { 652 Device: "tmpfs", 653 Source: "shm", 654 Destination: "/dev/shm", 655 Data: "mode=1777,size=65536k", 656 Flags: defaultMountFlags, 657 }, 658 { 659 Source: "mqueue", 660 Destination: "/dev/mqueue", 661 Device: "mqueue", 662 Flags: defaultMountFlags, 663 }, 664 { 665 Source: "sysfs", 666 Destination: "/sys", 667 Device: "sysfs", 668 Flags: defaultMountFlags | syscall.MS_RDONLY, 669 }, 670 } 671 672 if len(command.Mounts) > 0 { 673 cfg.Mounts = append(cfg.Mounts, cmdMounts(command.Mounts)...) 674 } 675 676 return nil 677 } 678 679 func configureCgroups(cfg *lconfigs.Config, command *ExecCommand) error { 680 681 // If resources are not limited then manually create cgroups needed 682 if !command.ResourceLimits { 683 return configureBasicCgroups(cfg) 684 } 685 686 id := uuid.Generate() 687 cfg.Cgroups.Path = filepath.Join("/", defaultCgroupParent, id) 688 689 if command.Resources == nil || command.Resources.NomadResources == nil { 690 return nil 691 } 692 693 if mb := command.Resources.NomadResources.Memory.MemoryMB; mb > 0 { 694 // Total amount of memory allowed to consume 695 cfg.Cgroups.Resources.Memory = mb * 1024 * 1024 696 // Disable swap to avoid issues on the machine 697 var memSwappiness uint64 698 cfg.Cgroups.Resources.MemorySwappiness = &memSwappiness 699 } 700 701 cpuShares := command.Resources.NomadResources.Cpu.CpuShares 702 if cpuShares < 2 { 703 return fmt.Errorf("resources.Cpu.CpuShares must be equal to or greater than 2: %v", cpuShares) 704 } 705 706 // Set the relative CPU shares for this cgroup. 707 cfg.Cgroups.Resources.CpuShares = uint64(cpuShares) 708 709 return nil 710 } 711 712 func configureBasicCgroups(cfg *lconfigs.Config) error { 713 id := uuid.Generate() 714 715 // Manually create freezer cgroup 716 cfg.Cgroups.Paths = map[string]string{} 717 root, err := cgroups.FindCgroupMountpointDir() 718 if err != nil { 719 return err 720 } 721 722 if _, err := os.Stat(root); err != nil { 723 return err 724 } 725 726 freezer := cgroupFs.FreezerGroup{} 727 subsystem := freezer.Name() 728 path, err := cgroups.FindCgroupMountpoint("", subsystem) 729 if err != nil { 730 return fmt.Errorf("failed to find %s cgroup mountpoint: %v", subsystem, err) 731 } 732 // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. 733 path = filepath.Join(root, filepath.Base(path), defaultCgroupParent, id) 734 735 if err = os.MkdirAll(path, 0755); err != nil { 736 return err 737 } 738 739 cfg.Cgroups.Paths[subsystem] = path 740 return nil 741 } 742 743 func newLibcontainerConfig(command *ExecCommand) (*lconfigs.Config, error) { 744 cfg := &lconfigs.Config{ 745 Cgroups: &lconfigs.Cgroup{ 746 Resources: &lconfigs.Resources{ 747 AllowAllDevices: nil, 748 MemorySwappiness: nil, 749 AllowedDevices: lconfigs.DefaultAllowedDevices, 750 }, 751 }, 752 Version: "1.0.0", 753 } 754 755 if err := configureCapabilities(cfg, command); err != nil { 756 return nil, err 757 } 758 if err := configureIsolation(cfg, command); err != nil { 759 return nil, err 760 } 761 if err := configureCgroups(cfg, command); err != nil { 762 return nil, err 763 } 764 return cfg, nil 765 } 766 767 // JoinRootCgroup moves the current process to the cgroups of the init process 768 func JoinRootCgroup(subsystems []string) error { 769 mErrs := new(multierror.Error) 770 paths := map[string]string{} 771 for _, s := range subsystems { 772 mnt, _, err := cgroups.FindCgroupMountpointAndRoot("", s) 773 if err != nil { 774 multierror.Append(mErrs, fmt.Errorf("error getting cgroup path for subsystem: %s", s)) 775 continue 776 } 777 778 paths[s] = mnt 779 } 780 781 err := cgroups.EnterPid(paths, os.Getpid()) 782 if err != nil { 783 multierror.Append(mErrs, err) 784 } 785 786 return mErrs.ErrorOrNil() 787 } 788 789 // cmdDevices converts a list of driver.DeviceConfigs into excutor.Devices. 790 func cmdDevices(devices []*drivers.DeviceConfig) ([]*lconfigs.Device, error) { 791 if len(devices) == 0 { 792 return nil, nil 793 } 794 795 r := make([]*lconfigs.Device, len(devices)) 796 797 for i, d := range devices { 798 ed, err := ldevices.DeviceFromPath(d.HostPath, d.Permissions) 799 if err != nil { 800 return nil, fmt.Errorf("failed to make device out for %s: %v", d.HostPath, err) 801 } 802 ed.Path = d.TaskPath 803 r[i] = ed 804 } 805 806 return r, nil 807 } 808 809 // cmdMounts converts a list of driver.MountConfigs into excutor.Mounts. 810 func cmdMounts(mounts []*drivers.MountConfig) []*lconfigs.Mount { 811 if len(mounts) == 0 { 812 return nil 813 } 814 815 r := make([]*lconfigs.Mount, len(mounts)) 816 817 for i, m := range mounts { 818 flags := unix.MS_BIND 819 if m.Readonly { 820 flags |= unix.MS_RDONLY 821 } 822 r[i] = &lconfigs.Mount{ 823 Source: m.HostPath, 824 Destination: m.TaskPath, 825 Device: "bind", 826 Flags: flags, 827 } 828 } 829 830 return r 831 } 832 833 // lookupTaskBin finds the file `bin` in taskDir/local, taskDir in that order, then performs 834 // a PATH search inside taskDir. It returns an absolute path. See also executor.lookupBin 835 func lookupTaskBin(command *ExecCommand) (string, error) { 836 taskDir := command.TaskDir 837 bin := command.Cmd 838 839 // Check in the local directory 840 localDir := filepath.Join(taskDir, allocdir.TaskLocal) 841 local := filepath.Join(localDir, bin) 842 if _, err := os.Stat(local); err == nil { 843 return local, nil 844 } 845 846 // Check at the root of the task's directory 847 root := filepath.Join(taskDir, bin) 848 if _, err := os.Stat(root); err == nil { 849 return root, nil 850 } 851 852 if strings.Contains(bin, "/") { 853 return "", fmt.Errorf("file %s not found under path %s", bin, taskDir) 854 } 855 856 // Find the PATH 857 path := "/usr/local/bin:/usr/bin:/bin" 858 for _, e := range command.Env { 859 if strings.HasPrefix("PATH=", e) { 860 path = e[5:] 861 } 862 } 863 864 return lookPathIn(path, taskDir, bin) 865 } 866 867 // lookPathIn looks for a file with PATH inside the directory root. Like exec.LookPath 868 func lookPathIn(path string, root string, bin string) (string, error) { 869 // exec.LookPath(file string) 870 for _, dir := range filepath.SplitList(path) { 871 if dir == "" { 872 // match unix shell behavior, empty path element == . 873 dir = "." 874 } 875 path := filepath.Join(root, dir, bin) 876 f, err := os.Stat(path) 877 if err != nil { 878 continue 879 } 880 if m := f.Mode(); !m.IsDir() { 881 return path, nil 882 } 883 } 884 return "", fmt.Errorf("file %s not found under path %s", bin, root) 885 }