github.com/goern/docker@v1.9.0-rc1/daemon/execdriver/native/driver.go (about)

     1  // +build linux,cgo
     2  
     3  package native
     4  
     5  import (
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"os/exec"
    10  	"path/filepath"
    11  	"strings"
    12  	"sync"
    13  	"syscall"
    14  	"time"
    15  
    16  	"github.com/Sirupsen/logrus"
    17  	"github.com/docker/docker/daemon/execdriver"
    18  	"github.com/docker/docker/pkg/parsers"
    19  	"github.com/docker/docker/pkg/pools"
    20  	"github.com/docker/docker/pkg/reexec"
    21  	sysinfo "github.com/docker/docker/pkg/system"
    22  	"github.com/docker/docker/pkg/term"
    23  	"github.com/opencontainers/runc/libcontainer"
    24  	"github.com/opencontainers/runc/libcontainer/apparmor"
    25  	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
    26  	"github.com/opencontainers/runc/libcontainer/configs"
    27  	"github.com/opencontainers/runc/libcontainer/system"
    28  	"github.com/opencontainers/runc/libcontainer/utils"
    29  )
    30  
    31  // Define constants for native driver
    32  const (
    33  	DriverName = "native"
    34  	Version    = "0.2"
    35  )
    36  
    37  // Driver contains all information for native driver,
    38  // it implements execdriver.Driver.
    39  type Driver struct {
    40  	root             string
    41  	initPath         string
    42  	activeContainers map[string]libcontainer.Container
    43  	machineMemory    int64
    44  	factory          libcontainer.Factory
    45  	sync.Mutex
    46  }
    47  
    48  // NewDriver returns a new native driver, called from NewDriver of execdriver.
    49  func NewDriver(root, initPath string, options []string) (*Driver, error) {
    50  	meminfo, err := sysinfo.ReadMemInfo()
    51  	if err != nil {
    52  		return nil, err
    53  	}
    54  
    55  	if err := sysinfo.MkdirAll(root, 0700); err != nil {
    56  		return nil, err
    57  	}
    58  
    59  	if apparmor.IsEnabled() {
    60  		if err := installAppArmorProfile(); err != nil {
    61  			apparmorProfiles := []string{"docker-default"}
    62  
    63  			// Allow daemon to run if loading failed, but are active
    64  			// (possibly through another run, manually, or via system startup)
    65  			for _, policy := range apparmorProfiles {
    66  				if err := hasAppArmorProfileLoaded(policy); err != nil {
    67  					return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
    68  				}
    69  			}
    70  		}
    71  	}
    72  
    73  	// choose cgroup manager
    74  	// this makes sure there are no breaking changes to people
    75  	// who upgrade from versions without native.cgroupdriver opt
    76  	cgm := libcontainer.Cgroupfs
    77  	if systemd.UseSystemd() {
    78  		cgm = libcontainer.SystemdCgroups
    79  	}
    80  
    81  	// parse the options
    82  	for _, option := range options {
    83  		key, val, err := parsers.ParseKeyValueOpt(option)
    84  		if err != nil {
    85  			return nil, err
    86  		}
    87  		key = strings.ToLower(key)
    88  		switch key {
    89  		case "native.cgroupdriver":
    90  			// override the default if they set options
    91  			switch val {
    92  			case "systemd":
    93  				if systemd.UseSystemd() {
    94  					cgm = libcontainer.SystemdCgroups
    95  				} else {
    96  					// warn them that they chose the wrong driver
    97  					logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
    98  				}
    99  			case "cgroupfs":
   100  				cgm = libcontainer.Cgroupfs
   101  			default:
   102  				return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
   103  			}
   104  		default:
   105  			return nil, fmt.Errorf("Unknown option %s\n", key)
   106  		}
   107  	}
   108  
   109  	f, err := libcontainer.New(
   110  		root,
   111  		cgm,
   112  		libcontainer.InitPath(reexec.Self(), DriverName),
   113  	)
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  
   118  	return &Driver{
   119  		root:             root,
   120  		initPath:         initPath,
   121  		activeContainers: make(map[string]libcontainer.Container),
   122  		machineMemory:    meminfo.MemTotal,
   123  		factory:          f,
   124  	}, nil
   125  }
   126  
   127  type execOutput struct {
   128  	exitCode int
   129  	err      error
   130  }
   131  
   132  // Run implements the exec driver Driver interface,
   133  // it calls libcontainer APIs to run a container.
   134  func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
   135  	// take the Command and populate the libcontainer.Config from it
   136  	container, err := d.createContainer(c, hooks)
   137  	if err != nil {
   138  		return execdriver.ExitStatus{ExitCode: -1}, err
   139  	}
   140  
   141  	p := &libcontainer.Process{
   142  		Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
   143  		Env:  c.ProcessConfig.Env,
   144  		Cwd:  c.WorkingDir,
   145  		User: c.ProcessConfig.User,
   146  	}
   147  
   148  	if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
   149  		return execdriver.ExitStatus{ExitCode: -1}, err
   150  	}
   151  
   152  	cont, err := d.factory.Create(c.ID, container)
   153  	if err != nil {
   154  		return execdriver.ExitStatus{ExitCode: -1}, err
   155  	}
   156  	d.Lock()
   157  	d.activeContainers[c.ID] = cont
   158  	d.Unlock()
   159  	defer func() {
   160  		cont.Destroy()
   161  		d.cleanContainer(c.ID)
   162  	}()
   163  
   164  	if err := cont.Start(p); err != nil {
   165  		return execdriver.ExitStatus{ExitCode: -1}, err
   166  	}
   167  
   168  	oom := notifyOnOOM(cont)
   169  	if hooks.Start != nil {
   170  
   171  		pid, err := p.Pid()
   172  		if err != nil {
   173  			p.Signal(os.Kill)
   174  			p.Wait()
   175  			return execdriver.ExitStatus{ExitCode: -1}, err
   176  		}
   177  		hooks.Start(&c.ProcessConfig, pid, oom)
   178  	}
   179  
   180  	waitF := p.Wait
   181  	if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
   182  		// we need such hack for tracking processes with inherited fds,
   183  		// because cmd.Wait() waiting for all streams to be copied
   184  		waitF = waitInPIDHost(p, cont)
   185  	}
   186  	ps, err := waitF()
   187  	if err != nil {
   188  		execErr, ok := err.(*exec.ExitError)
   189  		if !ok {
   190  			return execdriver.ExitStatus{ExitCode: -1}, err
   191  		}
   192  		ps = execErr.ProcessState
   193  	}
   194  	cont.Destroy()
   195  	_, oomKill := <-oom
   196  	return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
   197  }
   198  
   199  // notifyOnOOM returns a channel that signals if the container received an OOM notification
   200  // for any process. If it is unable to subscribe to OOM notifications then a closed
   201  // channel is returned as it will be non-blocking and return the correct result when read.
   202  func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
   203  	oom, err := container.NotifyOOM()
   204  	if err != nil {
   205  		logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
   206  		c := make(chan struct{})
   207  		close(c)
   208  		return c
   209  	}
   210  	return oom
   211  }
   212  
   213  func killCgroupProcs(c libcontainer.Container) {
   214  	var procs []*os.Process
   215  	if err := c.Pause(); err != nil {
   216  		logrus.Warn(err)
   217  	}
   218  	pids, err := c.Processes()
   219  	if err != nil {
   220  		// don't care about childs if we can't get them, this is mostly because cgroup already deleted
   221  		logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
   222  	}
   223  	for _, pid := range pids {
   224  		if p, err := os.FindProcess(pid); err == nil {
   225  			procs = append(procs, p)
   226  			if err := p.Kill(); err != nil {
   227  				logrus.Warn(err)
   228  			}
   229  		}
   230  	}
   231  	if err := c.Resume(); err != nil {
   232  		logrus.Warn(err)
   233  	}
   234  	for _, p := range procs {
   235  		if _, err := p.Wait(); err != nil {
   236  			logrus.Warn(err)
   237  		}
   238  	}
   239  }
   240  
   241  func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
   242  	return func() (*os.ProcessState, error) {
   243  		pid, err := p.Pid()
   244  		if err != nil {
   245  			return nil, err
   246  		}
   247  
   248  		process, err := os.FindProcess(pid)
   249  		s, err := process.Wait()
   250  		if err != nil {
   251  			execErr, ok := err.(*exec.ExitError)
   252  			if !ok {
   253  				return s, err
   254  			}
   255  			s = execErr.ProcessState
   256  		}
   257  		killCgroupProcs(c)
   258  		p.Wait()
   259  		return s, err
   260  	}
   261  }
   262  
   263  // Kill implements the exec driver Driver interface.
   264  func (d *Driver) Kill(c *execdriver.Command, sig int) error {
   265  	d.Lock()
   266  	active := d.activeContainers[c.ID]
   267  	d.Unlock()
   268  	if active == nil {
   269  		return fmt.Errorf("active container for %s does not exist", c.ID)
   270  	}
   271  	state, err := active.State()
   272  	if err != nil {
   273  		return err
   274  	}
   275  	return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
   276  }
   277  
   278  // Pause implements the exec driver Driver interface,
   279  // it calls libcontainer API to pause a container.
   280  func (d *Driver) Pause(c *execdriver.Command) error {
   281  	d.Lock()
   282  	active := d.activeContainers[c.ID]
   283  	d.Unlock()
   284  	if active == nil {
   285  		return fmt.Errorf("active container for %s does not exist", c.ID)
   286  	}
   287  	return active.Pause()
   288  }
   289  
   290  // Unpause implements the exec driver Driver interface,
   291  // it calls libcontainer API to unpause a container.
   292  func (d *Driver) Unpause(c *execdriver.Command) error {
   293  	d.Lock()
   294  	active := d.activeContainers[c.ID]
   295  	d.Unlock()
   296  	if active == nil {
   297  		return fmt.Errorf("active container for %s does not exist", c.ID)
   298  	}
   299  	return active.Resume()
   300  }
   301  
   302  // Terminate implements the exec driver Driver interface.
   303  func (d *Driver) Terminate(c *execdriver.Command) error {
   304  	defer d.cleanContainer(c.ID)
   305  	container, err := d.factory.Load(c.ID)
   306  	if err != nil {
   307  		return err
   308  	}
   309  	defer container.Destroy()
   310  	state, err := container.State()
   311  	if err != nil {
   312  		return err
   313  	}
   314  	pid := state.InitProcessPid
   315  	currentStartTime, err := system.GetProcessStartTime(pid)
   316  	if err != nil {
   317  		return err
   318  	}
   319  	if state.InitProcessStartTime == currentStartTime {
   320  		err = syscall.Kill(pid, 9)
   321  		syscall.Wait4(pid, nil, 0, nil)
   322  	}
   323  	return err
   324  }
   325  
   326  // Info implements the exec driver Driver interface.
   327  func (d *Driver) Info(id string) execdriver.Info {
   328  	return &info{
   329  		ID:     id,
   330  		driver: d,
   331  	}
   332  }
   333  
   334  // Name implements the exec driver Driver interface.
   335  func (d *Driver) Name() string {
   336  	return fmt.Sprintf("%s-%s", DriverName, Version)
   337  }
   338  
   339  // GetPidsForContainer implements the exec driver Driver interface.
   340  func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
   341  	d.Lock()
   342  	active := d.activeContainers[id]
   343  	d.Unlock()
   344  
   345  	if active == nil {
   346  		return nil, fmt.Errorf("active container for %s does not exist", id)
   347  	}
   348  	return active.Processes()
   349  }
   350  
   351  func (d *Driver) cleanContainer(id string) error {
   352  	d.Lock()
   353  	delete(d.activeContainers, id)
   354  	d.Unlock()
   355  	return os.RemoveAll(filepath.Join(d.root, id))
   356  }
   357  
   358  func (d *Driver) createContainerRoot(id string) error {
   359  	return os.MkdirAll(filepath.Join(d.root, id), 0655)
   360  }
   361  
   362  // Clean implements the exec driver Driver interface.
   363  func (d *Driver) Clean(id string) error {
   364  	return os.RemoveAll(filepath.Join(d.root, id))
   365  }
   366  
   367  // Stats implements the exec driver Driver interface.
   368  func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
   369  	d.Lock()
   370  	c := d.activeContainers[id]
   371  	d.Unlock()
   372  	if c == nil {
   373  		return nil, execdriver.ErrNotRunning
   374  	}
   375  	now := time.Now()
   376  	stats, err := c.Stats()
   377  	if err != nil {
   378  		return nil, err
   379  	}
   380  	memoryLimit := c.Config().Cgroups.Memory
   381  	// if the container does not have any memory limit specified set the
   382  	// limit to the machines memory
   383  	if memoryLimit == 0 {
   384  		memoryLimit = d.machineMemory
   385  	}
   386  	return &execdriver.ResourceStats{
   387  		Stats:       stats,
   388  		Read:        now,
   389  		MemoryLimit: memoryLimit,
   390  	}, nil
   391  }
   392  
   393  // TtyConsole implements the exec driver Terminal interface.
   394  type TtyConsole struct {
   395  	console libcontainer.Console
   396  }
   397  
   398  // NewTtyConsole returns a new TtyConsole struct.
   399  func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
   400  	tty := &TtyConsole{
   401  		console: console,
   402  	}
   403  
   404  	if err := tty.AttachPipes(pipes); err != nil {
   405  		tty.Close()
   406  		return nil, err
   407  	}
   408  
   409  	return tty, nil
   410  }
   411  
   412  // Resize implements Resize method of Terminal interface
   413  func (t *TtyConsole) Resize(h, w int) error {
   414  	return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
   415  }
   416  
   417  // AttachPipes attaches given pipes to TtyConsole
   418  func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
   419  	go func() {
   420  		if wb, ok := pipes.Stdout.(interface {
   421  			CloseWriters() error
   422  		}); ok {
   423  			defer wb.CloseWriters()
   424  		}
   425  
   426  		pools.Copy(pipes.Stdout, t.console)
   427  	}()
   428  
   429  	if pipes.Stdin != nil {
   430  		go func() {
   431  			pools.Copy(t.console, pipes.Stdin)
   432  
   433  			pipes.Stdin.Close()
   434  		}()
   435  	}
   436  
   437  	return nil
   438  }
   439  
   440  // Close implements Close method of Terminal interface
   441  func (t *TtyConsole) Close() error {
   442  	return t.console.Close()
   443  }
   444  
   445  func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
   446  
   447  	rootuid, err := container.HostUID()
   448  	if err != nil {
   449  		return err
   450  	}
   451  
   452  	if processConfig.Tty {
   453  		cons, err := p.NewConsole(rootuid)
   454  		if err != nil {
   455  			return err
   456  		}
   457  		term, err := NewTtyConsole(cons, pipes)
   458  		if err != nil {
   459  			return err
   460  		}
   461  		processConfig.Terminal = term
   462  		return nil
   463  	}
   464  	// not a tty--set up stdio pipes
   465  	term := &execdriver.StdConsole{}
   466  	processConfig.Terminal = term
   467  
   468  	// if we are not in a user namespace, there is no reason to go through
   469  	// the hassle of setting up os-level pipes with proper (remapped) ownership
   470  	// so we will do the prior shortcut for non-userns containers
   471  	if rootuid == 0 {
   472  		p.Stdout = pipes.Stdout
   473  		p.Stderr = pipes.Stderr
   474  
   475  		r, w, err := os.Pipe()
   476  		if err != nil {
   477  			return err
   478  		}
   479  		if pipes.Stdin != nil {
   480  			go func() {
   481  				io.Copy(w, pipes.Stdin)
   482  				w.Close()
   483  			}()
   484  			p.Stdin = r
   485  		}
   486  		return nil
   487  	}
   488  
   489  	// if we have user namespaces enabled (rootuid != 0), we will set
   490  	// up os pipes for stderr, stdout, stdin so we can chown them to
   491  	// the proper ownership to allow for proper access to the underlying
   492  	// fds
   493  	var fds []int
   494  
   495  	//setup stdout
   496  	r, w, err := os.Pipe()
   497  	if err != nil {
   498  		return err
   499  	}
   500  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   501  	if pipes.Stdout != nil {
   502  		go io.Copy(pipes.Stdout, r)
   503  	}
   504  	term.Closers = append(term.Closers, r)
   505  	p.Stdout = w
   506  
   507  	//setup stderr
   508  	r, w, err = os.Pipe()
   509  	if err != nil {
   510  		return err
   511  	}
   512  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   513  	if pipes.Stderr != nil {
   514  		go io.Copy(pipes.Stderr, r)
   515  	}
   516  	term.Closers = append(term.Closers, r)
   517  	p.Stderr = w
   518  
   519  	//setup stdin
   520  	r, w, err = os.Pipe()
   521  	if err != nil {
   522  		return err
   523  	}
   524  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   525  	if pipes.Stdin != nil {
   526  		go func() {
   527  			io.Copy(w, pipes.Stdin)
   528  			w.Close()
   529  		}()
   530  		p.Stdin = r
   531  	}
   532  	for _, fd := range fds {
   533  		if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
   534  			return fmt.Errorf("Failed to chown pipes fd: %v", err)
   535  		}
   536  	}
   537  	return nil
   538  }
   539  
   540  // SupportsHooks implements the execdriver Driver interface.
   541  // The libcontainer/runC-based native execdriver does exploit the hook mechanism
   542  func (d *Driver) SupportsHooks() bool {
   543  	return true
   544  }