github.com/jthurman42/docker@v1.6.0-rc1/daemon/execdriver/lxc/driver.go (about) 1 package lxc 2 3 import ( 4 "encoding/json" 5 "errors" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "os/exec" 11 "path" 12 "path/filepath" 13 "strconv" 14 "strings" 15 "sync" 16 "syscall" 17 "time" 18 19 log "github.com/Sirupsen/logrus" 20 "github.com/docker/docker/daemon/execdriver" 21 sysinfo "github.com/docker/docker/pkg/system" 22 "github.com/docker/docker/pkg/term" 23 "github.com/docker/docker/utils" 24 "github.com/docker/libcontainer" 25 "github.com/docker/libcontainer/cgroups" 26 "github.com/docker/libcontainer/configs" 27 "github.com/docker/libcontainer/system" 28 "github.com/docker/libcontainer/user" 29 "github.com/kr/pty" 30 ) 31 32 const DriverName = "lxc" 33 34 var ErrExec = errors.New("Unsupported: Exec is not supported by the lxc driver") 35 36 type driver struct { 37 root string // root path for the driver to use 38 initPath string 39 apparmor bool 40 sharedRoot bool 41 activeContainers map[string]*activeContainer 42 machineMemory int64 43 sync.Mutex 44 } 45 46 type activeContainer struct { 47 container *configs.Config 48 cmd *exec.Cmd 49 } 50 51 func NewDriver(root, initPath string, apparmor bool) (*driver, error) { 52 // setup unconfined symlink 53 if err := linkLxcStart(root); err != nil { 54 return nil, err 55 } 56 meminfo, err := sysinfo.ReadMemInfo() 57 if err != nil { 58 return nil, err 59 } 60 return &driver{ 61 apparmor: apparmor, 62 root: root, 63 initPath: initPath, 64 sharedRoot: rootIsShared(), 65 activeContainers: make(map[string]*activeContainer), 66 machineMemory: meminfo.MemTotal, 67 }, nil 68 } 69 70 func (d *driver) Name() string { 71 version := d.version() 72 return fmt.Sprintf("%s-%s", DriverName, version) 73 } 74 75 func (d *driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (execdriver.ExitStatus, error) { 76 var ( 77 term execdriver.Terminal 78 err error 79 dataPath = d.containerDir(c.ID) 80 ) 81 82 if c.ProcessConfig.Tty { 83 term, err = NewTtyConsole(&c.ProcessConfig, pipes) 84 } else { 85 term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes) 86 } 87 c.ProcessConfig.Terminal = term 88 container, err := d.createContainer(c) 89 if err != nil { 90 return execdriver.ExitStatus{ExitCode: -1}, err 91 } 92 d.Lock() 93 d.activeContainers[c.ID] = &activeContainer{ 94 container: container, 95 cmd: &c.ProcessConfig.Cmd, 96 } 97 d.Unlock() 98 99 c.Mounts = append(c.Mounts, execdriver.Mount{ 100 Source: d.initPath, 101 Destination: c.InitPath, 102 Writable: false, 103 Private: true, 104 }) 105 106 if err := d.generateEnvConfig(c); err != nil { 107 return execdriver.ExitStatus{ExitCode: -1}, err 108 } 109 configPath, err := d.generateLXCConfig(c) 110 if err != nil { 111 return execdriver.ExitStatus{ExitCode: -1}, err 112 } 113 params := []string{ 114 "lxc-start", 115 "-n", c.ID, 116 "-f", configPath, 117 } 118 if c.Network.ContainerID != "" { 119 params = append(params, 120 "--share-net", c.Network.ContainerID, 121 ) 122 } 123 if c.Ipc != nil { 124 if c.Ipc.ContainerID != "" { 125 params = append(params, 126 "--share-ipc", c.Ipc.ContainerID, 127 ) 128 } else if c.Ipc.HostIpc { 129 params = append(params, 130 "--share-ipc", "1", 131 ) 132 } 133 } 134 135 params = append(params, 136 "--", 137 c.InitPath, 138 ) 139 if c.Network.Interface != nil { 140 params = append(params, 141 "-g", c.Network.Interface.Gateway, 142 "-i", fmt.Sprintf("%s/%d", c.Network.Interface.IPAddress, c.Network.Interface.IPPrefixLen), 143 ) 144 } 145 params = append(params, 146 "-mtu", strconv.Itoa(c.Network.Mtu), 147 ) 148 149 if c.ProcessConfig.User != "" { 150 params = append(params, "-u", c.ProcessConfig.User) 151 } 152 153 if c.ProcessConfig.Privileged { 154 if d.apparmor { 155 params[0] = path.Join(d.root, "lxc-start-unconfined") 156 157 } 158 params = append(params, "-privileged") 159 } 160 161 if c.WorkingDir != "" { 162 params = append(params, "-w", c.WorkingDir) 163 } 164 165 params = append(params, "--", c.ProcessConfig.Entrypoint) 166 params = append(params, c.ProcessConfig.Arguments...) 167 168 if d.sharedRoot { 169 // lxc-start really needs / to be non-shared, or all kinds of stuff break 170 // when lxc-start unmount things and those unmounts propagate to the main 171 // mount namespace. 172 // What we really want is to clone into a new namespace and then 173 // mount / MS_REC|MS_SLAVE, but since we can't really clone or fork 174 // without exec in go we have to do this horrible shell hack... 175 shellString := 176 "mount --make-rslave /; exec " + 177 utils.ShellQuoteArguments(params) 178 179 params = []string{ 180 "unshare", "-m", "--", "/bin/sh", "-c", shellString, 181 } 182 } 183 log.Debugf("lxc params %s", params) 184 var ( 185 name = params[0] 186 arg = params[1:] 187 ) 188 aname, err := exec.LookPath(name) 189 if err != nil { 190 aname = name 191 } 192 c.ProcessConfig.Path = aname 193 c.ProcessConfig.Args = append([]string{name}, arg...) 194 195 if err := createDeviceNodes(c.Rootfs, c.AutoCreatedDevices); err != nil { 196 return execdriver.ExitStatus{ExitCode: -1}, err 197 } 198 199 if err := c.ProcessConfig.Start(); err != nil { 200 return execdriver.ExitStatus{ExitCode: -1}, err 201 } 202 203 var ( 204 waitErr error 205 waitLock = make(chan struct{}) 206 ) 207 208 go func() { 209 if err := c.ProcessConfig.Wait(); err != nil { 210 if _, ok := err.(*exec.ExitError); !ok { // Do not propagate the error if it's simply a status code != 0 211 waitErr = err 212 } 213 } 214 close(waitLock) 215 }() 216 217 terminate := func(terr error) (execdriver.ExitStatus, error) { 218 if c.ProcessConfig.Process != nil { 219 c.ProcessConfig.Process.Kill() 220 c.ProcessConfig.Wait() 221 } 222 return execdriver.ExitStatus{ExitCode: -1}, terr 223 } 224 // Poll lxc for RUNNING status 225 pid, err := d.waitForStart(c, waitLock) 226 if err != nil { 227 return terminate(err) 228 } 229 230 cgroupPaths, err := cgroupPaths(c.ID) 231 if err != nil { 232 return terminate(err) 233 } 234 235 state := &libcontainer.State{ 236 InitProcessPid: pid, 237 CgroupPaths: cgroupPaths, 238 } 239 240 f, err := os.Create(filepath.Join(dataPath, "state.json")) 241 if err != nil { 242 return terminate(err) 243 } 244 defer f.Close() 245 246 if err := json.NewEncoder(f).Encode(state); err != nil { 247 return terminate(err) 248 } 249 250 c.ContainerPid = pid 251 252 if startCallback != nil { 253 log.Debugf("Invoking startCallback") 254 startCallback(&c.ProcessConfig, pid) 255 } 256 257 oomKill := false 258 oomKillNotification, err := notifyOnOOM(cgroupPaths) 259 260 <-waitLock 261 262 if err == nil { 263 _, oomKill = <-oomKillNotification 264 log.Debugf("oomKill error %s waitErr %s", oomKill, waitErr) 265 } else { 266 log.Warnf("Your kernel does not support OOM notifications: %s", err) 267 } 268 269 // check oom error 270 exitCode := getExitCode(c) 271 if oomKill { 272 exitCode = 137 273 } 274 return execdriver.ExitStatus{ExitCode: exitCode, OOMKilled: oomKill}, waitErr 275 } 276 277 // copy from libcontainer 278 func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) { 279 dir := paths["memory"] 280 if dir == "" { 281 return nil, fmt.Errorf("There is no path for %q in state", "memory") 282 } 283 oomControl, err := os.Open(filepath.Join(dir, "memory.oom_control")) 284 if err != nil { 285 return nil, err 286 } 287 fd, _, syserr := syscall.RawSyscall(syscall.SYS_EVENTFD2, 0, syscall.FD_CLOEXEC, 0) 288 if syserr != 0 { 289 oomControl.Close() 290 return nil, syserr 291 } 292 293 eventfd := os.NewFile(fd, "eventfd") 294 295 eventControlPath := filepath.Join(dir, "cgroup.event_control") 296 data := fmt.Sprintf("%d %d", eventfd.Fd(), oomControl.Fd()) 297 if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil { 298 eventfd.Close() 299 oomControl.Close() 300 return nil, err 301 } 302 ch := make(chan struct{}) 303 go func() { 304 defer func() { 305 close(ch) 306 eventfd.Close() 307 oomControl.Close() 308 }() 309 buf := make([]byte, 8) 310 for { 311 if _, err := eventfd.Read(buf); err != nil { 312 return 313 } 314 // When a cgroup is destroyed, an event is sent to eventfd. 315 // So if the control path is gone, return instead of notifying. 316 if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) { 317 return 318 } 319 ch <- struct{}{} 320 } 321 }() 322 return ch, nil 323 } 324 325 // createContainer populates and configures the container type with the 326 // data provided by the execdriver.Command 327 func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) { 328 container := execdriver.InitContainer(c) 329 if err := execdriver.SetupCgroups(container, c); err != nil { 330 return nil, err 331 } 332 return container, nil 333 } 334 335 // Return an map of susbystem -> container cgroup 336 func cgroupPaths(containerId string) (map[string]string, error) { 337 subsystems, err := cgroups.GetAllSubsystems() 338 if err != nil { 339 return nil, err 340 } 341 log.Debugf("subsystems: %s", subsystems) 342 paths := make(map[string]string) 343 for _, subsystem := range subsystems { 344 cgroupRoot, cgroupDir, err := findCgroupRootAndDir(subsystem) 345 log.Debugf("cgroup path %s %s", cgroupRoot, cgroupDir) 346 if err != nil { 347 //unsupported subystem 348 continue 349 } 350 path := filepath.Join(cgroupRoot, cgroupDir, "lxc", containerId) 351 paths[subsystem] = path 352 } 353 354 return paths, nil 355 } 356 357 // this is copy from old libcontainer nodes.go 358 func createDeviceNodes(rootfs string, nodesToCreate []*configs.Device) error { 359 oldMask := syscall.Umask(0000) 360 defer syscall.Umask(oldMask) 361 362 for _, node := range nodesToCreate { 363 if err := createDeviceNode(rootfs, node); err != nil { 364 return err 365 } 366 } 367 return nil 368 } 369 370 // Creates the device node in the rootfs of the container. 371 func createDeviceNode(rootfs string, node *configs.Device) error { 372 var ( 373 dest = filepath.Join(rootfs, node.Path) 374 parent = filepath.Dir(dest) 375 ) 376 377 if err := os.MkdirAll(parent, 0755); err != nil { 378 return err 379 } 380 381 fileMode := node.FileMode 382 switch node.Type { 383 case 'c': 384 fileMode |= syscall.S_IFCHR 385 case 'b': 386 fileMode |= syscall.S_IFBLK 387 default: 388 return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) 389 } 390 391 if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) { 392 return fmt.Errorf("mknod %s %s", node.Path, err) 393 } 394 395 if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil { 396 return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid) 397 } 398 399 return nil 400 } 401 402 // setupUser changes the groups, gid, and uid for the user inside the container 403 // copy from libcontainer, cause not it's private 404 func setupUser(userSpec string) error { 405 // Set up defaults. 406 defaultExecUser := user.ExecUser{ 407 Uid: syscall.Getuid(), 408 Gid: syscall.Getgid(), 409 Home: "/", 410 } 411 passwdPath, err := user.GetPasswdPath() 412 if err != nil { 413 return err 414 } 415 groupPath, err := user.GetGroupPath() 416 if err != nil { 417 return err 418 } 419 execUser, err := user.GetExecUserPath(userSpec, &defaultExecUser, passwdPath, groupPath) 420 if err != nil { 421 return err 422 } 423 if err := syscall.Setgroups(execUser.Sgids); err != nil { 424 return err 425 } 426 if err := system.Setgid(execUser.Gid); err != nil { 427 return err 428 } 429 if err := system.Setuid(execUser.Uid); err != nil { 430 return err 431 } 432 // if we didn't get HOME already, set it based on the user's HOME 433 if envHome := os.Getenv("HOME"); envHome == "" { 434 if err := os.Setenv("HOME", execUser.Home); err != nil { 435 return err 436 } 437 } 438 return nil 439 } 440 441 /// Return the exit code of the process 442 // if the process has not exited -1 will be returned 443 func getExitCode(c *execdriver.Command) int { 444 if c.ProcessConfig.ProcessState == nil { 445 return -1 446 } 447 return c.ProcessConfig.ProcessState.Sys().(syscall.WaitStatus).ExitStatus() 448 } 449 450 func (d *driver) Kill(c *execdriver.Command, sig int) error { 451 return KillLxc(c.ID, sig) 452 } 453 454 func (d *driver) Pause(c *execdriver.Command) error { 455 _, err := exec.LookPath("lxc-freeze") 456 if err == nil { 457 output, errExec := exec.Command("lxc-freeze", "-n", c.ID).CombinedOutput() 458 if errExec != nil { 459 return fmt.Errorf("Err: %s Output: %s", errExec, output) 460 } 461 } 462 463 return err 464 } 465 466 func (d *driver) Unpause(c *execdriver.Command) error { 467 _, err := exec.LookPath("lxc-unfreeze") 468 if err == nil { 469 output, errExec := exec.Command("lxc-unfreeze", "-n", c.ID).CombinedOutput() 470 if errExec != nil { 471 return fmt.Errorf("Err: %s Output: %s", errExec, output) 472 } 473 } 474 475 return err 476 } 477 478 func (d *driver) Terminate(c *execdriver.Command) error { 479 return KillLxc(c.ID, 9) 480 } 481 482 func (d *driver) version() string { 483 var ( 484 version string 485 output []byte 486 err error 487 ) 488 if _, errPath := exec.LookPath("lxc-version"); errPath == nil { 489 output, err = exec.Command("lxc-version").CombinedOutput() 490 } else { 491 output, err = exec.Command("lxc-start", "--version").CombinedOutput() 492 } 493 if err == nil { 494 version = strings.TrimSpace(string(output)) 495 if parts := strings.SplitN(version, ":", 2); len(parts) == 2 { 496 version = strings.TrimSpace(parts[1]) 497 } 498 } 499 return version 500 } 501 502 func KillLxc(id string, sig int) error { 503 var ( 504 err error 505 output []byte 506 ) 507 _, err = exec.LookPath("lxc-kill") 508 if err == nil { 509 output, err = exec.Command("lxc-kill", "-n", id, strconv.Itoa(sig)).CombinedOutput() 510 } else { 511 output, err = exec.Command("lxc-stop", "-k", "-n", id, strconv.Itoa(sig)).CombinedOutput() 512 } 513 if err != nil { 514 return fmt.Errorf("Err: %s Output: %s", err, output) 515 } 516 return nil 517 } 518 519 // wait for the process to start and return the pid for the process 520 func (d *driver) waitForStart(c *execdriver.Command, waitLock chan struct{}) (int, error) { 521 var ( 522 err error 523 output []byte 524 ) 525 // We wait for the container to be fully running. 526 // Timeout after 5 seconds. In case of broken pipe, just retry. 527 // Note: The container can run and finish correctly before 528 // the end of this loop 529 for now := time.Now(); time.Since(now) < 5*time.Second; { 530 select { 531 case <-waitLock: 532 // If the process dies while waiting for it, just return 533 return -1, nil 534 default: 535 } 536 537 output, err = d.getInfo(c.ID) 538 if err == nil { 539 info, err := parseLxcInfo(string(output)) 540 if err != nil { 541 return -1, err 542 } 543 if info.Running { 544 return info.Pid, nil 545 } 546 } 547 time.Sleep(50 * time.Millisecond) 548 } 549 return -1, execdriver.ErrNotRunning 550 } 551 552 func (d *driver) getInfo(id string) ([]byte, error) { 553 return exec.Command("lxc-info", "-n", id).CombinedOutput() 554 } 555 556 type info struct { 557 ID string 558 driver *driver 559 } 560 561 func (i *info) IsRunning() bool { 562 var running bool 563 564 output, err := i.driver.getInfo(i.ID) 565 if err != nil { 566 log.Errorf("Error getting info for lxc container %s: %s (%s)", i.ID, err, output) 567 return false 568 } 569 if strings.Contains(string(output), "RUNNING") { 570 running = true 571 } 572 return running 573 } 574 575 func (d *driver) Info(id string) execdriver.Info { 576 return &info{ 577 ID: id, 578 driver: d, 579 } 580 } 581 582 func findCgroupRootAndDir(subsystem string) (string, string, error) { 583 cgroupRoot, err := cgroups.FindCgroupMountpoint(subsystem) 584 if err != nil { 585 return "", "", err 586 } 587 588 cgroupDir, err := cgroups.GetThisCgroupDir(subsystem) 589 if err != nil { 590 return "", "", err 591 } 592 return cgroupRoot, cgroupDir, nil 593 } 594 595 func (d *driver) GetPidsForContainer(id string) ([]int, error) { 596 pids := []int{} 597 598 // cpu is chosen because it is the only non optional subsystem in cgroups 599 subsystem := "cpu" 600 cgroupRoot, cgroupDir, err := findCgroupRootAndDir(subsystem) 601 if err != nil { 602 return pids, err 603 } 604 605 filename := filepath.Join(cgroupRoot, cgroupDir, id, "tasks") 606 if _, err := os.Stat(filename); os.IsNotExist(err) { 607 // With more recent lxc versions use, cgroup will be in lxc/ 608 filename = filepath.Join(cgroupRoot, cgroupDir, "lxc", id, "tasks") 609 } 610 611 output, err := ioutil.ReadFile(filename) 612 if err != nil { 613 return pids, err 614 } 615 for _, p := range strings.Split(string(output), "\n") { 616 if len(p) == 0 { 617 continue 618 } 619 pid, err := strconv.Atoi(p) 620 if err != nil { 621 return pids, fmt.Errorf("Invalid pid '%s': %s", p, err) 622 } 623 pids = append(pids, pid) 624 } 625 return pids, nil 626 } 627 628 func linkLxcStart(root string) error { 629 sourcePath, err := exec.LookPath("lxc-start") 630 if err != nil { 631 return err 632 } 633 targetPath := path.Join(root, "lxc-start-unconfined") 634 635 if _, err := os.Lstat(targetPath); err != nil && !os.IsNotExist(err) { 636 return err 637 } else if err == nil { 638 if err := os.Remove(targetPath); err != nil { 639 return err 640 } 641 } 642 return os.Symlink(sourcePath, targetPath) 643 } 644 645 // TODO: This can be moved to the mountinfo reader in the mount pkg 646 func rootIsShared() bool { 647 if data, err := ioutil.ReadFile("/proc/self/mountinfo"); err == nil { 648 for _, line := range strings.Split(string(data), "\n") { 649 cols := strings.Split(line, " ") 650 if len(cols) >= 6 && cols[4] == "/" { 651 return strings.HasPrefix(cols[6], "shared") 652 } 653 } 654 } 655 656 // No idea, probably safe to assume so 657 return true 658 } 659 660 func (d *driver) containerDir(containerId string) string { 661 return path.Join(d.root, "containers", containerId) 662 } 663 664 func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) { 665 root := path.Join(d.containerDir(c.ID), "config.lxc") 666 667 fo, err := os.Create(root) 668 if err != nil { 669 return "", err 670 } 671 defer fo.Close() 672 673 if err := LxcTemplateCompiled.Execute(fo, struct { 674 *execdriver.Command 675 AppArmor bool 676 }{ 677 Command: c, 678 AppArmor: d.apparmor, 679 }); err != nil { 680 return "", err 681 } 682 683 return root, nil 684 } 685 686 func (d *driver) generateEnvConfig(c *execdriver.Command) error { 687 data, err := json.Marshal(c.ProcessConfig.Env) 688 if err != nil { 689 return err 690 } 691 p := path.Join(d.root, "containers", c.ID, "config.env") 692 c.Mounts = append(c.Mounts, execdriver.Mount{ 693 Source: p, 694 Destination: "/.dockerenv", 695 Writable: false, 696 Private: true, 697 }) 698 699 return ioutil.WriteFile(p, data, 0600) 700 } 701 702 // Clean not implemented for lxc 703 func (d *driver) Clean(id string) error { 704 return nil 705 } 706 707 type TtyConsole struct { 708 MasterPty *os.File 709 SlavePty *os.File 710 } 711 712 func NewTtyConsole(processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes) (*TtyConsole, error) { 713 // lxc is special in that we cannot create the master outside of the container without 714 // opening the slave because we have nothing to provide to the cmd. We have to open both then do 715 // the crazy setup on command right now instead of passing the console path to lxc and telling it 716 // to open up that console. we save a couple of openfiles in the native driver because we can do 717 // this. 718 ptyMaster, ptySlave, err := pty.Open() 719 if err != nil { 720 return nil, err 721 } 722 723 tty := &TtyConsole{ 724 MasterPty: ptyMaster, 725 SlavePty: ptySlave, 726 } 727 728 if err := tty.AttachPipes(&processConfig.Cmd, pipes); err != nil { 729 tty.Close() 730 return nil, err 731 } 732 733 processConfig.Console = tty.SlavePty.Name() 734 735 return tty, nil 736 } 737 738 func (t *TtyConsole) Master() *os.File { 739 return t.MasterPty 740 } 741 742 func (t *TtyConsole) Resize(h, w int) error { 743 return term.SetWinsize(t.MasterPty.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)}) 744 } 745 746 func (t *TtyConsole) AttachPipes(command *exec.Cmd, pipes *execdriver.Pipes) error { 747 command.Stdout = t.SlavePty 748 command.Stderr = t.SlavePty 749 750 go func() { 751 if wb, ok := pipes.Stdout.(interface { 752 CloseWriters() error 753 }); ok { 754 defer wb.CloseWriters() 755 } 756 757 io.Copy(pipes.Stdout, t.MasterPty) 758 }() 759 760 if pipes.Stdin != nil { 761 command.Stdin = t.SlavePty 762 command.SysProcAttr.Setctty = true 763 764 go func() { 765 io.Copy(t.MasterPty, pipes.Stdin) 766 767 pipes.Stdin.Close() 768 }() 769 } 770 return nil 771 } 772 773 func (t *TtyConsole) Close() error { 774 t.SlavePty.Close() 775 return t.MasterPty.Close() 776 } 777 778 func (d *driver) Exec(c *execdriver.Command, processConfig *execdriver.ProcessConfig, pipes *execdriver.Pipes, startCallback execdriver.StartCallback) (int, error) { 779 return -1, ErrExec 780 } 781 782 func (d *driver) Stats(id string) (*execdriver.ResourceStats, error) { 783 return execdriver.Stats(d.containerDir(id), d.activeContainers[id].container.Cgroups.Memory, d.machineMemory) 784 }