github.com/ncdc/docker@v0.10.1-0.20160129113957-6c6729ef5b74/daemon/execdriver/native/driver.go (about)

     1  // +build linux,cgo
     2  
     3  package native
     4  
     5  import (
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"os"
    10  	"os/exec"
    11  	"path/filepath"
    12  	"strings"
    13  	"sync"
    14  	"syscall"
    15  	"time"
    16  
    17  	"github.com/Sirupsen/logrus"
    18  	"github.com/docker/docker/daemon/execdriver"
    19  	"github.com/docker/docker/pkg/parsers"
    20  	"github.com/docker/docker/pkg/pools"
    21  	"github.com/docker/docker/pkg/reexec"
    22  	sysinfo "github.com/docker/docker/pkg/system"
    23  	"github.com/docker/docker/pkg/term"
    24  	aaprofile "github.com/docker/docker/profiles/apparmor"
    25  	"github.com/opencontainers/runc/libcontainer"
    26  	"github.com/opencontainers/runc/libcontainer/apparmor"
    27  	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
    28  	"github.com/opencontainers/runc/libcontainer/configs"
    29  	"github.com/opencontainers/runc/libcontainer/system"
    30  	"github.com/opencontainers/runc/libcontainer/utils"
    31  )
    32  
    33  // Define constants for native driver
    34  const (
    35  	DriverName = "native"
    36  	Version    = "0.2"
    37  
    38  	defaultApparmorProfile = "docker-default"
    39  )
    40  
    41  // Driver contains all information for native driver,
    42  // it implements execdriver.Driver.
    43  type Driver struct {
    44  	root             string
    45  	activeContainers map[string]libcontainer.Container
    46  	machineMemory    int64
    47  	factory          libcontainer.Factory
    48  	sync.Mutex
    49  }
    50  
    51  // NewDriver returns a new native driver, called from NewDriver of execdriver.
    52  func NewDriver(root string, options []string) (*Driver, error) {
    53  	meminfo, err := sysinfo.ReadMemInfo()
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  
    58  	if err := sysinfo.MkdirAll(root, 0700); err != nil {
    59  		return nil, err
    60  	}
    61  
    62  	if apparmor.IsEnabled() {
    63  		if err := aaprofile.InstallDefault(defaultApparmorProfile); err != nil {
    64  			apparmorProfiles := []string{defaultApparmorProfile}
    65  
    66  			// Allow daemon to run if loading failed, but are active
    67  			// (possibly through another run, manually, or via system startup)
    68  			for _, policy := range apparmorProfiles {
    69  				if err := aaprofile.IsLoaded(policy); err != nil {
    70  					return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
    71  				}
    72  			}
    73  		}
    74  	}
    75  
    76  	// choose cgroup manager
    77  	// this makes sure there are no breaking changes to people
    78  	// who upgrade from versions without native.cgroupdriver opt
    79  	cgm := libcontainer.Cgroupfs
    80  
    81  	// parse the options
    82  	for _, option := range options {
    83  		key, val, err := parsers.ParseKeyValueOpt(option)
    84  		if err != nil {
    85  			return nil, err
    86  		}
    87  		key = strings.ToLower(key)
    88  		switch key {
    89  		case "native.cgroupdriver":
    90  			// override the default if they set options
    91  			switch val {
    92  			case "systemd":
    93  				if systemd.UseSystemd() {
    94  					cgm = libcontainer.SystemdCgroups
    95  				} else {
    96  					// warn them that they chose the wrong driver
    97  					logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
    98  				}
    99  			case "cgroupfs":
   100  				cgm = libcontainer.Cgroupfs
   101  			default:
   102  				return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
   103  			}
   104  		default:
   105  			return nil, fmt.Errorf("Unknown option %s\n", key)
   106  		}
   107  	}
   108  
   109  	f, err := libcontainer.New(
   110  		root,
   111  		cgm,
   112  		libcontainer.InitPath(reexec.Self(), DriverName),
   113  	)
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  
   118  	return &Driver{
   119  		root:             root,
   120  		activeContainers: make(map[string]libcontainer.Container),
   121  		machineMemory:    meminfo.MemTotal,
   122  		factory:          f,
   123  	}, nil
   124  }
   125  
   126  type execOutput struct {
   127  	exitCode int
   128  	err      error
   129  }
   130  
   131  // Run implements the exec driver Driver interface,
   132  // it calls libcontainer APIs to run a container.
   133  func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
   134  	destroyed := false
   135  	var err error
   136  	c.TmpDir, err = ioutil.TempDir("", c.ID)
   137  	if err != nil {
   138  		return execdriver.ExitStatus{ExitCode: -1}, err
   139  	}
   140  	defer os.RemoveAll(c.TmpDir)
   141  
   142  	// take the Command and populate the libcontainer.Config from it
   143  	container, err := d.createContainer(c, hooks)
   144  	if err != nil {
   145  		return execdriver.ExitStatus{ExitCode: -1}, err
   146  	}
   147  
   148  	p := &libcontainer.Process{
   149  		Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
   150  		Env:  c.ProcessConfig.Env,
   151  		Cwd:  c.WorkingDir,
   152  		User: c.ProcessConfig.User,
   153  	}
   154  
   155  	if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
   156  		return execdriver.ExitStatus{ExitCode: -1}, err
   157  	}
   158  
   159  	cont, err := d.factory.Create(c.ID, container)
   160  	if err != nil {
   161  		return execdriver.ExitStatus{ExitCode: -1}, err
   162  	}
   163  	d.Lock()
   164  	d.activeContainers[c.ID] = cont
   165  	d.Unlock()
   166  	defer func() {
   167  		if !destroyed {
   168  			cont.Destroy()
   169  		}
   170  		d.cleanContainer(c.ID)
   171  	}()
   172  
   173  	if err := cont.Start(p); err != nil {
   174  		return execdriver.ExitStatus{ExitCode: -1}, err
   175  	}
   176  
   177  	// 'oom' is used to emit 'oom' events to the eventstream, 'oomKilled' is used
   178  	// to set the 'OOMKilled' flag in state
   179  	oom := notifyOnOOM(cont)
   180  	oomKilled := notifyOnOOM(cont)
   181  	if hooks.Start != nil {
   182  		pid, err := p.Pid()
   183  		if err != nil {
   184  			p.Signal(os.Kill)
   185  			p.Wait()
   186  			return execdriver.ExitStatus{ExitCode: -1}, err
   187  		}
   188  		hooks.Start(&c.ProcessConfig, pid, oom)
   189  	}
   190  
   191  	waitF := p.Wait
   192  	if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
   193  		// we need such hack for tracking processes with inherited fds,
   194  		// because cmd.Wait() waiting for all streams to be copied
   195  		waitF = waitInPIDHost(p, cont)
   196  	}
   197  	ps, err := waitF()
   198  	if err != nil {
   199  		execErr, ok := err.(*exec.ExitError)
   200  		if !ok {
   201  			return execdriver.ExitStatus{ExitCode: -1}, err
   202  		}
   203  		ps = execErr.ProcessState
   204  	}
   205  	cont.Destroy()
   206  	destroyed = true
   207  	// oomKilled will have an oom event if any process within the container was
   208  	// OOM killed at any time, not only if the init process OOMed.
   209  	//
   210  	// Perhaps we only want the OOMKilled flag to be set if the OOM
   211  	// resulted in a container death, but there isn't a good way to do this
   212  	// because the kernel's cgroup oom notification does not provide information
   213  	// such as the PID. This could be heuristically done by checking that the OOM
   214  	// happened within some very small time slice for the container dying (and
   215  	// optionally exit-code 137), but I don't think the cgroup oom notification
   216  	// can be used to reliably determine this
   217  	//
   218  	// Even if there were multiple OOMs, it's sufficient to read one value
   219  	// because libcontainer's oom notify will discard the channel after the
   220  	// cgroup is destroyed
   221  	_, oomKill := <-oomKilled
   222  	return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
   223  }
   224  
   225  // notifyOnOOM returns a channel that signals if the container received an OOM notification
   226  // for any process. If it is unable to subscribe to OOM notifications then a closed
   227  // channel is returned as it will be non-blocking and return the correct result when read.
   228  func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
   229  	oom, err := container.NotifyOOM()
   230  	if err != nil {
   231  		logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
   232  		c := make(chan struct{})
   233  		close(c)
   234  		return c
   235  	}
   236  	return oom
   237  }
   238  
   239  func killCgroupProcs(c libcontainer.Container) {
   240  	var procs []*os.Process
   241  	if err := c.Pause(); err != nil {
   242  		logrus.Warn(err)
   243  	}
   244  	pids, err := c.Processes()
   245  	if err != nil {
   246  		// don't care about childs if we can't get them, this is mostly because cgroup already deleted
   247  		logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
   248  	}
   249  	for _, pid := range pids {
   250  		if p, err := os.FindProcess(pid); err == nil {
   251  			procs = append(procs, p)
   252  			if err := p.Kill(); err != nil {
   253  				logrus.Warn(err)
   254  			}
   255  		}
   256  	}
   257  	if err := c.Resume(); err != nil {
   258  		logrus.Warn(err)
   259  	}
   260  	for _, p := range procs {
   261  		if _, err := p.Wait(); err != nil {
   262  			logrus.Warn(err)
   263  		}
   264  	}
   265  }
   266  
   267  func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
   268  	return func() (*os.ProcessState, error) {
   269  		pid, err := p.Pid()
   270  		if err != nil {
   271  			return nil, err
   272  		}
   273  
   274  		process, err := os.FindProcess(pid)
   275  		s, err := process.Wait()
   276  		if err != nil {
   277  			execErr, ok := err.(*exec.ExitError)
   278  			if !ok {
   279  				return s, err
   280  			}
   281  			s = execErr.ProcessState
   282  		}
   283  		killCgroupProcs(c)
   284  		p.Wait()
   285  		return s, err
   286  	}
   287  }
   288  
   289  // Kill implements the exec driver Driver interface.
   290  func (d *Driver) Kill(c *execdriver.Command, sig int) error {
   291  	d.Lock()
   292  	active := d.activeContainers[c.ID]
   293  	d.Unlock()
   294  	if active == nil {
   295  		return fmt.Errorf("active container for %s does not exist", c.ID)
   296  	}
   297  	state, err := active.State()
   298  	if err != nil {
   299  		return err
   300  	}
   301  	return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
   302  }
   303  
   304  // Pause implements the exec driver Driver interface,
   305  // it calls libcontainer API to pause a container.
   306  func (d *Driver) Pause(c *execdriver.Command) error {
   307  	d.Lock()
   308  	active := d.activeContainers[c.ID]
   309  	d.Unlock()
   310  	if active == nil {
   311  		return fmt.Errorf("active container for %s does not exist", c.ID)
   312  	}
   313  	return active.Pause()
   314  }
   315  
   316  // Unpause implements the exec driver Driver interface,
   317  // it calls libcontainer API to unpause a container.
   318  func (d *Driver) Unpause(c *execdriver.Command) error {
   319  	d.Lock()
   320  	active := d.activeContainers[c.ID]
   321  	d.Unlock()
   322  	if active == nil {
   323  		return fmt.Errorf("active container for %s does not exist", c.ID)
   324  	}
   325  	return active.Resume()
   326  }
   327  
   328  // Terminate implements the exec driver Driver interface.
   329  func (d *Driver) Terminate(c *execdriver.Command) error {
   330  	defer d.cleanContainer(c.ID)
   331  	container, err := d.factory.Load(c.ID)
   332  	if err != nil {
   333  		return err
   334  	}
   335  	defer container.Destroy()
   336  	state, err := container.State()
   337  	if err != nil {
   338  		return err
   339  	}
   340  	pid := state.InitProcessPid
   341  	currentStartTime, err := system.GetProcessStartTime(pid)
   342  	if err != nil {
   343  		return err
   344  	}
   345  	if state.InitProcessStartTime == currentStartTime {
   346  		err = syscall.Kill(pid, 9)
   347  		syscall.Wait4(pid, nil, 0, nil)
   348  	}
   349  	return err
   350  }
   351  
   352  // Info implements the exec driver Driver interface.
   353  func (d *Driver) Info(id string) execdriver.Info {
   354  	return &info{
   355  		ID:     id,
   356  		driver: d,
   357  	}
   358  }
   359  
   360  // Name implements the exec driver Driver interface.
   361  func (d *Driver) Name() string {
   362  	return fmt.Sprintf("%s-%s", DriverName, Version)
   363  }
   364  
   365  // GetPidsForContainer implements the exec driver Driver interface.
   366  func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
   367  	d.Lock()
   368  	active := d.activeContainers[id]
   369  	d.Unlock()
   370  
   371  	if active == nil {
   372  		return nil, fmt.Errorf("active container for %s does not exist", id)
   373  	}
   374  	return active.Processes()
   375  }
   376  
   377  func (d *Driver) cleanContainer(id string) error {
   378  	d.Lock()
   379  	delete(d.activeContainers, id)
   380  	d.Unlock()
   381  	return os.RemoveAll(filepath.Join(d.root, id))
   382  }
   383  
   384  func (d *Driver) createContainerRoot(id string) error {
   385  	return os.MkdirAll(filepath.Join(d.root, id), 0655)
   386  }
   387  
   388  // Clean implements the exec driver Driver interface.
   389  func (d *Driver) Clean(id string) error {
   390  	return os.RemoveAll(filepath.Join(d.root, id))
   391  }
   392  
   393  // Stats implements the exec driver Driver interface.
   394  func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
   395  	d.Lock()
   396  	c := d.activeContainers[id]
   397  	d.Unlock()
   398  	if c == nil {
   399  		return nil, execdriver.ErrNotRunning
   400  	}
   401  	now := time.Now()
   402  	stats, err := c.Stats()
   403  	if err != nil {
   404  		return nil, err
   405  	}
   406  	memoryLimit := c.Config().Cgroups.Resources.Memory
   407  	// if the container does not have any memory limit specified set the
   408  	// limit to the machines memory
   409  	if memoryLimit == 0 {
   410  		memoryLimit = d.machineMemory
   411  	}
   412  	return &execdriver.ResourceStats{
   413  		Stats:       stats,
   414  		Read:        now,
   415  		MemoryLimit: memoryLimit,
   416  	}, nil
   417  }
   418  
   419  // Update updates configs for a container
   420  func (d *Driver) Update(c *execdriver.Command) error {
   421  	d.Lock()
   422  	cont := d.activeContainers[c.ID]
   423  	d.Unlock()
   424  	if cont == nil {
   425  		return execdriver.ErrNotRunning
   426  	}
   427  	config := cont.Config()
   428  	if err := execdriver.SetupCgroups(&config, c); err != nil {
   429  		return err
   430  	}
   431  
   432  	if err := cont.Set(config); err != nil {
   433  		return err
   434  	}
   435  
   436  	return nil
   437  }
   438  
   439  // TtyConsole implements the exec driver Terminal interface.
   440  type TtyConsole struct {
   441  	console libcontainer.Console
   442  }
   443  
   444  // NewTtyConsole returns a new TtyConsole struct.
   445  func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
   446  	tty := &TtyConsole{
   447  		console: console,
   448  	}
   449  
   450  	if err := tty.AttachPipes(pipes); err != nil {
   451  		tty.Close()
   452  		return nil, err
   453  	}
   454  
   455  	return tty, nil
   456  }
   457  
   458  // Resize implements Resize method of Terminal interface
   459  func (t *TtyConsole) Resize(h, w int) error {
   460  	return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
   461  }
   462  
   463  // AttachPipes attaches given pipes to TtyConsole
   464  func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
   465  	go func() {
   466  		if wb, ok := pipes.Stdout.(interface {
   467  			CloseWriters() error
   468  		}); ok {
   469  			defer wb.CloseWriters()
   470  		}
   471  
   472  		pools.Copy(pipes.Stdout, t.console)
   473  	}()
   474  
   475  	if pipes.Stdin != nil {
   476  		go func() {
   477  			pools.Copy(t.console, pipes.Stdin)
   478  
   479  			pipes.Stdin.Close()
   480  		}()
   481  	}
   482  
   483  	return nil
   484  }
   485  
   486  // Close implements Close method of Terminal interface
   487  func (t *TtyConsole) Close() error {
   488  	return t.console.Close()
   489  }
   490  
   491  func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
   492  
   493  	rootuid, err := container.HostUID()
   494  	if err != nil {
   495  		return err
   496  	}
   497  
   498  	if processConfig.Tty {
   499  		cons, err := p.NewConsole(rootuid)
   500  		if err != nil {
   501  			return err
   502  		}
   503  		term, err := NewTtyConsole(cons, pipes)
   504  		if err != nil {
   505  			return err
   506  		}
   507  		processConfig.Terminal = term
   508  		return nil
   509  	}
   510  	// not a tty--set up stdio pipes
   511  	term := &execdriver.StdConsole{}
   512  	processConfig.Terminal = term
   513  
   514  	// if we are not in a user namespace, there is no reason to go through
   515  	// the hassle of setting up os-level pipes with proper (remapped) ownership
   516  	// so we will do the prior shortcut for non-userns containers
   517  	if rootuid == 0 {
   518  		p.Stdout = pipes.Stdout
   519  		p.Stderr = pipes.Stderr
   520  
   521  		r, w, err := os.Pipe()
   522  		if err != nil {
   523  			return err
   524  		}
   525  		if pipes.Stdin != nil {
   526  			go func() {
   527  				io.Copy(w, pipes.Stdin)
   528  				w.Close()
   529  			}()
   530  			p.Stdin = r
   531  		}
   532  		return nil
   533  	}
   534  
   535  	// if we have user namespaces enabled (rootuid != 0), we will set
   536  	// up os pipes for stderr, stdout, stdin so we can chown them to
   537  	// the proper ownership to allow for proper access to the underlying
   538  	// fds
   539  	var fds []int
   540  
   541  	//setup stdout
   542  	r, w, err := os.Pipe()
   543  	if err != nil {
   544  		return err
   545  	}
   546  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   547  	if pipes.Stdout != nil {
   548  		go io.Copy(pipes.Stdout, r)
   549  	}
   550  	term.Closers = append(term.Closers, r)
   551  	p.Stdout = w
   552  
   553  	//setup stderr
   554  	r, w, err = os.Pipe()
   555  	if err != nil {
   556  		return err
   557  	}
   558  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   559  	if pipes.Stderr != nil {
   560  		go io.Copy(pipes.Stderr, r)
   561  	}
   562  	term.Closers = append(term.Closers, r)
   563  	p.Stderr = w
   564  
   565  	//setup stdin
   566  	r, w, err = os.Pipe()
   567  	if err != nil {
   568  		return err
   569  	}
   570  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   571  	if pipes.Stdin != nil {
   572  		go func() {
   573  			io.Copy(w, pipes.Stdin)
   574  			w.Close()
   575  		}()
   576  		p.Stdin = r
   577  	}
   578  	for _, fd := range fds {
   579  		if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
   580  			return fmt.Errorf("Failed to chown pipes fd: %v", err)
   581  		}
   582  	}
   583  	return nil
   584  }
   585  
   586  // SupportsHooks implements the execdriver Driver interface.
   587  // The libcontainer/runC-based native execdriver does exploit the hook mechanism
   588  func (d *Driver) SupportsHooks() bool {
   589  	return true
   590  }