github.com/endocode/docker@v1.4.2-0.20160113120958-46eb4700391e/daemon/execdriver/native/driver.go (about)

     1  // +build linux,cgo
     2  
     3  package native
     4  
     5  import (
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"os"
    10  	"os/exec"
    11  	"path/filepath"
    12  	"strings"
    13  	"sync"
    14  	"syscall"
    15  	"time"
    16  
    17  	"github.com/Sirupsen/logrus"
    18  	"github.com/docker/docker/daemon/execdriver"
    19  	"github.com/docker/docker/pkg/parsers"
    20  	"github.com/docker/docker/pkg/pools"
    21  	"github.com/docker/docker/pkg/reexec"
    22  	sysinfo "github.com/docker/docker/pkg/system"
    23  	"github.com/docker/docker/pkg/term"
    24  	"github.com/opencontainers/runc/libcontainer"
    25  	"github.com/opencontainers/runc/libcontainer/apparmor"
    26  	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
    27  	"github.com/opencontainers/runc/libcontainer/configs"
    28  	"github.com/opencontainers/runc/libcontainer/system"
    29  	"github.com/opencontainers/runc/libcontainer/utils"
    30  )
    31  
    32  // Define constants for native driver
    33  const (
    34  	DriverName = "native"
    35  	Version    = "0.2"
    36  )
    37  
    38  // Driver contains all information for native driver,
    39  // it implements execdriver.Driver.
    40  type Driver struct {
    41  	root             string
    42  	activeContainers map[string]libcontainer.Container
    43  	machineMemory    int64
    44  	factory          libcontainer.Factory
    45  	sync.Mutex
    46  }
    47  
    48  // NewDriver returns a new native driver, called from NewDriver of execdriver.
    49  func NewDriver(root string, options []string) (*Driver, error) {
    50  	meminfo, err := sysinfo.ReadMemInfo()
    51  	if err != nil {
    52  		return nil, err
    53  	}
    54  
    55  	if err := sysinfo.MkdirAll(root, 0700); err != nil {
    56  		return nil, err
    57  	}
    58  
    59  	if apparmor.IsEnabled() {
    60  		if err := installAppArmorProfile(); err != nil {
    61  			apparmorProfiles := []string{"docker-default"}
    62  
    63  			// Allow daemon to run if loading failed, but are active
    64  			// (possibly through another run, manually, or via system startup)
    65  			for _, policy := range apparmorProfiles {
    66  				if err := hasAppArmorProfileLoaded(policy); err != nil {
    67  					return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
    68  				}
    69  			}
    70  		}
    71  	}
    72  
    73  	// choose cgroup manager
    74  	// this makes sure there are no breaking changes to people
    75  	// who upgrade from versions without native.cgroupdriver opt
    76  	cgm := libcontainer.Cgroupfs
    77  
    78  	// parse the options
    79  	for _, option := range options {
    80  		key, val, err := parsers.ParseKeyValueOpt(option)
    81  		if err != nil {
    82  			return nil, err
    83  		}
    84  		key = strings.ToLower(key)
    85  		switch key {
    86  		case "native.cgroupdriver":
    87  			// override the default if they set options
    88  			switch val {
    89  			case "systemd":
    90  				if systemd.UseSystemd() {
    91  					cgm = libcontainer.SystemdCgroups
    92  				} else {
    93  					// warn them that they chose the wrong driver
    94  					logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
    95  				}
    96  			case "cgroupfs":
    97  				cgm = libcontainer.Cgroupfs
    98  			default:
    99  				return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
   100  			}
   101  		default:
   102  			return nil, fmt.Errorf("Unknown option %s\n", key)
   103  		}
   104  	}
   105  
   106  	f, err := libcontainer.New(
   107  		root,
   108  		cgm,
   109  		libcontainer.InitPath(reexec.Self(), DriverName),
   110  	)
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  
   115  	return &Driver{
   116  		root:             root,
   117  		activeContainers: make(map[string]libcontainer.Container),
   118  		machineMemory:    meminfo.MemTotal,
   119  		factory:          f,
   120  	}, nil
   121  }
   122  
   123  type execOutput struct {
   124  	exitCode int
   125  	err      error
   126  }
   127  
   128  // Run implements the exec driver Driver interface,
   129  // it calls libcontainer APIs to run a container.
   130  func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
   131  	destroyed := false
   132  	var err error
   133  	c.TmpDir, err = ioutil.TempDir("", c.ID)
   134  	if err != nil {
   135  		return execdriver.ExitStatus{ExitCode: -1}, err
   136  	}
   137  	defer os.RemoveAll(c.TmpDir)
   138  
   139  	// take the Command and populate the libcontainer.Config from it
   140  	container, err := d.createContainer(c, hooks)
   141  	if err != nil {
   142  		return execdriver.ExitStatus{ExitCode: -1}, err
   143  	}
   144  
   145  	p := &libcontainer.Process{
   146  		Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
   147  		Env:  c.ProcessConfig.Env,
   148  		Cwd:  c.WorkingDir,
   149  		User: c.ProcessConfig.User,
   150  	}
   151  
   152  	if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
   153  		return execdriver.ExitStatus{ExitCode: -1}, err
   154  	}
   155  
   156  	cont, err := d.factory.Create(c.ID, container)
   157  	if err != nil {
   158  		return execdriver.ExitStatus{ExitCode: -1}, err
   159  	}
   160  	d.Lock()
   161  	d.activeContainers[c.ID] = cont
   162  	d.Unlock()
   163  	defer func() {
   164  		if !destroyed {
   165  			cont.Destroy()
   166  		}
   167  		d.cleanContainer(c.ID)
   168  	}()
   169  
   170  	if err := cont.Start(p); err != nil {
   171  		return execdriver.ExitStatus{ExitCode: -1}, err
   172  	}
   173  
   174  	// 'oom' is used to emit 'oom' events to the eventstream, 'oomKilled' is used
   175  	// to set the 'OOMKilled' flag in state
   176  	oom := notifyOnOOM(cont)
   177  	oomKilled := notifyOnOOM(cont)
   178  	if hooks.Start != nil {
   179  		pid, err := p.Pid()
   180  		if err != nil {
   181  			p.Signal(os.Kill)
   182  			p.Wait()
   183  			return execdriver.ExitStatus{ExitCode: -1}, err
   184  		}
   185  		hooks.Start(&c.ProcessConfig, pid, oom)
   186  	}
   187  
   188  	waitF := p.Wait
   189  	if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
   190  		// we need such hack for tracking processes with inherited fds,
   191  		// because cmd.Wait() waiting for all streams to be copied
   192  		waitF = waitInPIDHost(p, cont)
   193  	}
   194  	ps, err := waitF()
   195  	if err != nil {
   196  		execErr, ok := err.(*exec.ExitError)
   197  		if !ok {
   198  			return execdriver.ExitStatus{ExitCode: -1}, err
   199  		}
   200  		ps = execErr.ProcessState
   201  	}
   202  	cont.Destroy()
   203  	destroyed = true
   204  	// oomKilled will have an oom event if any process within the container was
   205  	// OOM killed at any time, not only if the init process OOMed.
   206  	//
   207  	// Perhaps we only want the OOMKilled flag to be set if the OOM
   208  	// resulted in a container death, but there isn't a good way to do this
   209  	// because the kernel's cgroup oom notification does not provide information
   210  	// such as the PID. This could be heuristically done by checking that the OOM
   211  	// happened within some very small time slice for the container dying (and
   212  	// optionally exit-code 137), but I don't think the cgroup oom notification
   213  	// can be used to reliably determine this
   214  	//
   215  	// Even if there were multiple OOMs, it's sufficient to read one value
   216  	// because libcontainer's oom notify will discard the channel after the
   217  	// cgroup is destroyed
   218  	_, oomKill := <-oomKilled
   219  	return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
   220  }
   221  
   222  // notifyOnOOM returns a channel that signals if the container received an OOM notification
   223  // for any process. If it is unable to subscribe to OOM notifications then a closed
   224  // channel is returned as it will be non-blocking and return the correct result when read.
   225  func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
   226  	oom, err := container.NotifyOOM()
   227  	if err != nil {
   228  		logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
   229  		c := make(chan struct{})
   230  		close(c)
   231  		return c
   232  	}
   233  	return oom
   234  }
   235  
   236  func killCgroupProcs(c libcontainer.Container) {
   237  	var procs []*os.Process
   238  	if err := c.Pause(); err != nil {
   239  		logrus.Warn(err)
   240  	}
   241  	pids, err := c.Processes()
   242  	if err != nil {
   243  		// don't care about childs if we can't get them, this is mostly because cgroup already deleted
   244  		logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
   245  	}
   246  	for _, pid := range pids {
   247  		if p, err := os.FindProcess(pid); err == nil {
   248  			procs = append(procs, p)
   249  			if err := p.Kill(); err != nil {
   250  				logrus.Warn(err)
   251  			}
   252  		}
   253  	}
   254  	if err := c.Resume(); err != nil {
   255  		logrus.Warn(err)
   256  	}
   257  	for _, p := range procs {
   258  		if _, err := p.Wait(); err != nil {
   259  			logrus.Warn(err)
   260  		}
   261  	}
   262  }
   263  
   264  func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
   265  	return func() (*os.ProcessState, error) {
   266  		pid, err := p.Pid()
   267  		if err != nil {
   268  			return nil, err
   269  		}
   270  
   271  		process, err := os.FindProcess(pid)
   272  		s, err := process.Wait()
   273  		if err != nil {
   274  			execErr, ok := err.(*exec.ExitError)
   275  			if !ok {
   276  				return s, err
   277  			}
   278  			s = execErr.ProcessState
   279  		}
   280  		killCgroupProcs(c)
   281  		p.Wait()
   282  		return s, err
   283  	}
   284  }
   285  
   286  // Kill implements the exec driver Driver interface.
   287  func (d *Driver) Kill(c *execdriver.Command, sig int) error {
   288  	d.Lock()
   289  	active := d.activeContainers[c.ID]
   290  	d.Unlock()
   291  	if active == nil {
   292  		return fmt.Errorf("active container for %s does not exist", c.ID)
   293  	}
   294  	state, err := active.State()
   295  	if err != nil {
   296  		return err
   297  	}
   298  	return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
   299  }
   300  
   301  // Pause implements the exec driver Driver interface,
   302  // it calls libcontainer API to pause a container.
   303  func (d *Driver) Pause(c *execdriver.Command) error {
   304  	d.Lock()
   305  	active := d.activeContainers[c.ID]
   306  	d.Unlock()
   307  	if active == nil {
   308  		return fmt.Errorf("active container for %s does not exist", c.ID)
   309  	}
   310  	return active.Pause()
   311  }
   312  
   313  // Unpause implements the exec driver Driver interface,
   314  // it calls libcontainer API to unpause a container.
   315  func (d *Driver) Unpause(c *execdriver.Command) error {
   316  	d.Lock()
   317  	active := d.activeContainers[c.ID]
   318  	d.Unlock()
   319  	if active == nil {
   320  		return fmt.Errorf("active container for %s does not exist", c.ID)
   321  	}
   322  	return active.Resume()
   323  }
   324  
   325  // Terminate implements the exec driver Driver interface.
   326  func (d *Driver) Terminate(c *execdriver.Command) error {
   327  	defer d.cleanContainer(c.ID)
   328  	container, err := d.factory.Load(c.ID)
   329  	if err != nil {
   330  		return err
   331  	}
   332  	defer container.Destroy()
   333  	state, err := container.State()
   334  	if err != nil {
   335  		return err
   336  	}
   337  	pid := state.InitProcessPid
   338  	currentStartTime, err := system.GetProcessStartTime(pid)
   339  	if err != nil {
   340  		return err
   341  	}
   342  	if state.InitProcessStartTime == currentStartTime {
   343  		err = syscall.Kill(pid, 9)
   344  		syscall.Wait4(pid, nil, 0, nil)
   345  	}
   346  	return err
   347  }
   348  
   349  // Info implements the exec driver Driver interface.
   350  func (d *Driver) Info(id string) execdriver.Info {
   351  	return &info{
   352  		ID:     id,
   353  		driver: d,
   354  	}
   355  }
   356  
   357  // Name implements the exec driver Driver interface.
   358  func (d *Driver) Name() string {
   359  	return fmt.Sprintf("%s-%s", DriverName, Version)
   360  }
   361  
   362  // GetPidsForContainer implements the exec driver Driver interface.
   363  func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
   364  	d.Lock()
   365  	active := d.activeContainers[id]
   366  	d.Unlock()
   367  
   368  	if active == nil {
   369  		return nil, fmt.Errorf("active container for %s does not exist", id)
   370  	}
   371  	return active.Processes()
   372  }
   373  
   374  func (d *Driver) cleanContainer(id string) error {
   375  	d.Lock()
   376  	delete(d.activeContainers, id)
   377  	d.Unlock()
   378  	return os.RemoveAll(filepath.Join(d.root, id))
   379  }
   380  
   381  func (d *Driver) createContainerRoot(id string) error {
   382  	return os.MkdirAll(filepath.Join(d.root, id), 0655)
   383  }
   384  
   385  // Clean implements the exec driver Driver interface.
   386  func (d *Driver) Clean(id string) error {
   387  	return os.RemoveAll(filepath.Join(d.root, id))
   388  }
   389  
   390  // Stats implements the exec driver Driver interface.
   391  func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
   392  	d.Lock()
   393  	c := d.activeContainers[id]
   394  	d.Unlock()
   395  	if c == nil {
   396  		return nil, execdriver.ErrNotRunning
   397  	}
   398  	now := time.Now()
   399  	stats, err := c.Stats()
   400  	if err != nil {
   401  		return nil, err
   402  	}
   403  	memoryLimit := c.Config().Cgroups.Resources.Memory
   404  	// if the container does not have any memory limit specified set the
   405  	// limit to the machines memory
   406  	if memoryLimit == 0 {
   407  		memoryLimit = d.machineMemory
   408  	}
   409  	return &execdriver.ResourceStats{
   410  		Stats:       stats,
   411  		Read:        now,
   412  		MemoryLimit: memoryLimit,
   413  	}, nil
   414  }
   415  
   416  // Update updates configs for a container
   417  func (d *Driver) Update(c *execdriver.Command) error {
   418  	d.Lock()
   419  	cont := d.activeContainers[c.ID]
   420  	d.Unlock()
   421  	if cont == nil {
   422  		return execdriver.ErrNotRunning
   423  	}
   424  	config := cont.Config()
   425  	if err := execdriver.SetupCgroups(&config, c); err != nil {
   426  		return err
   427  	}
   428  
   429  	if err := cont.Set(config); err != nil {
   430  		return err
   431  	}
   432  
   433  	return nil
   434  }
   435  
   436  // TtyConsole implements the exec driver Terminal interface.
   437  type TtyConsole struct {
   438  	console libcontainer.Console
   439  }
   440  
   441  // NewTtyConsole returns a new TtyConsole struct.
   442  func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
   443  	tty := &TtyConsole{
   444  		console: console,
   445  	}
   446  
   447  	if err := tty.AttachPipes(pipes); err != nil {
   448  		tty.Close()
   449  		return nil, err
   450  	}
   451  
   452  	return tty, nil
   453  }
   454  
   455  // Resize implements Resize method of Terminal interface
   456  func (t *TtyConsole) Resize(h, w int) error {
   457  	return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
   458  }
   459  
   460  // AttachPipes attaches given pipes to TtyConsole
   461  func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
   462  	go func() {
   463  		if wb, ok := pipes.Stdout.(interface {
   464  			CloseWriters() error
   465  		}); ok {
   466  			defer wb.CloseWriters()
   467  		}
   468  
   469  		pools.Copy(pipes.Stdout, t.console)
   470  	}()
   471  
   472  	if pipes.Stdin != nil {
   473  		go func() {
   474  			pools.Copy(t.console, pipes.Stdin)
   475  
   476  			pipes.Stdin.Close()
   477  		}()
   478  	}
   479  
   480  	return nil
   481  }
   482  
   483  // Close implements Close method of Terminal interface
   484  func (t *TtyConsole) Close() error {
   485  	return t.console.Close()
   486  }
   487  
   488  func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
   489  
   490  	rootuid, err := container.HostUID()
   491  	if err != nil {
   492  		return err
   493  	}
   494  
   495  	if processConfig.Tty {
   496  		cons, err := p.NewConsole(rootuid)
   497  		if err != nil {
   498  			return err
   499  		}
   500  		term, err := NewTtyConsole(cons, pipes)
   501  		if err != nil {
   502  			return err
   503  		}
   504  		processConfig.Terminal = term
   505  		return nil
   506  	}
   507  	// not a tty--set up stdio pipes
   508  	term := &execdriver.StdConsole{}
   509  	processConfig.Terminal = term
   510  
   511  	// if we are not in a user namespace, there is no reason to go through
   512  	// the hassle of setting up os-level pipes with proper (remapped) ownership
   513  	// so we will do the prior shortcut for non-userns containers
   514  	if rootuid == 0 {
   515  		p.Stdout = pipes.Stdout
   516  		p.Stderr = pipes.Stderr
   517  
   518  		r, w, err := os.Pipe()
   519  		if err != nil {
   520  			return err
   521  		}
   522  		if pipes.Stdin != nil {
   523  			go func() {
   524  				io.Copy(w, pipes.Stdin)
   525  				w.Close()
   526  			}()
   527  			p.Stdin = r
   528  		}
   529  		return nil
   530  	}
   531  
   532  	// if we have user namespaces enabled (rootuid != 0), we will set
   533  	// up os pipes for stderr, stdout, stdin so we can chown them to
   534  	// the proper ownership to allow for proper access to the underlying
   535  	// fds
   536  	var fds []int
   537  
   538  	//setup stdout
   539  	r, w, err := os.Pipe()
   540  	if err != nil {
   541  		return err
   542  	}
   543  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   544  	if pipes.Stdout != nil {
   545  		go io.Copy(pipes.Stdout, r)
   546  	}
   547  	term.Closers = append(term.Closers, r)
   548  	p.Stdout = w
   549  
   550  	//setup stderr
   551  	r, w, err = os.Pipe()
   552  	if err != nil {
   553  		return err
   554  	}
   555  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   556  	if pipes.Stderr != nil {
   557  		go io.Copy(pipes.Stderr, r)
   558  	}
   559  	term.Closers = append(term.Closers, r)
   560  	p.Stderr = w
   561  
   562  	//setup stdin
   563  	r, w, err = os.Pipe()
   564  	if err != nil {
   565  		return err
   566  	}
   567  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   568  	if pipes.Stdin != nil {
   569  		go func() {
   570  			io.Copy(w, pipes.Stdin)
   571  			w.Close()
   572  		}()
   573  		p.Stdin = r
   574  	}
   575  	for _, fd := range fds {
   576  		if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
   577  			return fmt.Errorf("Failed to chown pipes fd: %v", err)
   578  		}
   579  	}
   580  	return nil
   581  }
   582  
   583  // SupportsHooks implements the execdriver Driver interface.
   584  // The libcontainer/runC-based native execdriver does exploit the hook mechanism
   585  func (d *Driver) SupportsHooks() bool {
   586  	return true
   587  }