github.com/walkingsparrow/docker@v1.4.2-0.20151218153551-b708a2249bfa/daemon/execdriver/native/driver.go (about)

     1  // +build linux,cgo
     2  
     3  package native
     4  
     5  import (
     6  	"fmt"
     7  	"io"
     8  	"io/ioutil"
     9  	"os"
    10  	"os/exec"
    11  	"path/filepath"
    12  	"strings"
    13  	"sync"
    14  	"syscall"
    15  	"time"
    16  
    17  	"github.com/Sirupsen/logrus"
    18  	"github.com/docker/docker/daemon/execdriver"
    19  	"github.com/docker/docker/daemon/execdriver/native/template"
    20  	"github.com/docker/docker/pkg/parsers"
    21  	"github.com/docker/docker/pkg/pools"
    22  	"github.com/docker/docker/pkg/reexec"
    23  	sysinfo "github.com/docker/docker/pkg/system"
    24  	"github.com/docker/docker/pkg/term"
    25  	"github.com/opencontainers/runc/libcontainer"
    26  	"github.com/opencontainers/runc/libcontainer/apparmor"
    27  	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
    28  	"github.com/opencontainers/runc/libcontainer/configs"
    29  	"github.com/opencontainers/runc/libcontainer/system"
    30  	"github.com/opencontainers/runc/libcontainer/utils"
    31  )
    32  
    33  // Define constants for native driver
    34  const (
    35  	DriverName = "native"
    36  	Version    = "0.2"
    37  )
    38  
    39  // Driver contains all information for native driver,
    40  // it implements execdriver.Driver.
    41  type Driver struct {
    42  	root             string
    43  	activeContainers map[string]libcontainer.Container
    44  	machineMemory    int64
    45  	factory          libcontainer.Factory
    46  	sync.Mutex
    47  }
    48  
    49  // NewDriver returns a new native driver, called from NewDriver of execdriver.
    50  func NewDriver(root string, options []string) (*Driver, error) {
    51  	meminfo, err := sysinfo.ReadMemInfo()
    52  	if err != nil {
    53  		return nil, err
    54  	}
    55  
    56  	if err := sysinfo.MkdirAll(root, 0700); err != nil {
    57  		return nil, err
    58  	}
    59  
    60  	if apparmor.IsEnabled() {
    61  		if err := installAppArmorProfile(); err != nil {
    62  			apparmorProfiles := []string{"docker-default"}
    63  
    64  			// Allow daemon to run if loading failed, but are active
    65  			// (possibly through another run, manually, or via system startup)
    66  			for _, policy := range apparmorProfiles {
    67  				if err := hasAppArmorProfileLoaded(policy); err != nil {
    68  					return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
    69  				}
    70  			}
    71  		}
    72  	}
    73  
    74  	// choose cgroup manager
    75  	// this makes sure there are no breaking changes to people
    76  	// who upgrade from versions without native.cgroupdriver opt
    77  	cgm := libcontainer.Cgroupfs
    78  
    79  	// parse the options
    80  	for _, option := range options {
    81  		key, val, err := parsers.ParseKeyValueOpt(option)
    82  		if err != nil {
    83  			return nil, err
    84  		}
    85  		key = strings.ToLower(key)
    86  		switch key {
    87  		case "native.cgroupdriver":
    88  			// override the default if they set options
    89  			switch val {
    90  			case "systemd":
    91  				if systemd.UseSystemd() {
    92  					cgm = libcontainer.SystemdCgroups
    93  					template.SystemdCgroups = true
    94  				} else {
    95  					// warn them that they chose the wrong driver
    96  					logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
    97  				}
    98  			case "cgroupfs":
    99  				cgm = libcontainer.Cgroupfs
   100  			default:
   101  				return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
   102  			}
   103  		default:
   104  			return nil, fmt.Errorf("Unknown option %s\n", key)
   105  		}
   106  	}
   107  
   108  	f, err := libcontainer.New(
   109  		root,
   110  		cgm,
   111  		libcontainer.InitPath(reexec.Self(), DriverName),
   112  	)
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  
   117  	return &Driver{
   118  		root:             root,
   119  		activeContainers: make(map[string]libcontainer.Container),
   120  		machineMemory:    meminfo.MemTotal,
   121  		factory:          f,
   122  	}, nil
   123  }
   124  
   125  type execOutput struct {
   126  	exitCode int
   127  	err      error
   128  }
   129  
   130  // Run implements the exec driver Driver interface,
   131  // it calls libcontainer APIs to run a container.
   132  func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
   133  	destroyed := false
   134  	var err error
   135  	c.TmpDir, err = ioutil.TempDir("", c.ID)
   136  	if err != nil {
   137  		return execdriver.ExitStatus{ExitCode: -1}, err
   138  	}
   139  	defer os.RemoveAll(c.TmpDir)
   140  
   141  	// take the Command and populate the libcontainer.Config from it
   142  	container, err := d.createContainer(c, hooks)
   143  	if err != nil {
   144  		return execdriver.ExitStatus{ExitCode: -1}, err
   145  	}
   146  
   147  	p := &libcontainer.Process{
   148  		Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
   149  		Env:  c.ProcessConfig.Env,
   150  		Cwd:  c.WorkingDir,
   151  		User: c.ProcessConfig.User,
   152  	}
   153  
   154  	if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
   155  		return execdriver.ExitStatus{ExitCode: -1}, err
   156  	}
   157  
   158  	cont, err := d.factory.Create(c.ID, container)
   159  	if err != nil {
   160  		return execdriver.ExitStatus{ExitCode: -1}, err
   161  	}
   162  	d.Lock()
   163  	d.activeContainers[c.ID] = cont
   164  	d.Unlock()
   165  	defer func() {
   166  		if !destroyed {
   167  			cont.Destroy()
   168  		}
   169  		d.cleanContainer(c.ID)
   170  	}()
   171  
   172  	if err := cont.Start(p); err != nil {
   173  		return execdriver.ExitStatus{ExitCode: -1}, err
   174  	}
   175  
   176  	oom := notifyOnOOM(cont)
   177  	if hooks.Start != nil {
   178  		pid, err := p.Pid()
   179  		if err != nil {
   180  			p.Signal(os.Kill)
   181  			p.Wait()
   182  			return execdriver.ExitStatus{ExitCode: -1}, err
   183  		}
   184  		hooks.Start(&c.ProcessConfig, pid, oom)
   185  	}
   186  
   187  	waitF := p.Wait
   188  	if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
   189  		// we need such hack for tracking processes with inherited fds,
   190  		// because cmd.Wait() waiting for all streams to be copied
   191  		waitF = waitInPIDHost(p, cont)
   192  	}
   193  	ps, err := waitF()
   194  	if err != nil {
   195  		execErr, ok := err.(*exec.ExitError)
   196  		if !ok {
   197  			return execdriver.ExitStatus{ExitCode: -1}, err
   198  		}
   199  		ps = execErr.ProcessState
   200  	}
   201  	cont.Destroy()
   202  	destroyed = true
   203  	_, oomKill := <-oom
   204  	return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
   205  }
   206  
   207  // notifyOnOOM returns a channel that signals if the container received an OOM notification
   208  // for any process. If it is unable to subscribe to OOM notifications then a closed
   209  // channel is returned as it will be non-blocking and return the correct result when read.
   210  func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
   211  	oom, err := container.NotifyOOM()
   212  	if err != nil {
   213  		logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
   214  		c := make(chan struct{})
   215  		close(c)
   216  		return c
   217  	}
   218  	return oom
   219  }
   220  
   221  func killCgroupProcs(c libcontainer.Container) {
   222  	var procs []*os.Process
   223  	if err := c.Pause(); err != nil {
   224  		logrus.Warn(err)
   225  	}
   226  	pids, err := c.Processes()
   227  	if err != nil {
   228  		// don't care about childs if we can't get them, this is mostly because cgroup already deleted
   229  		logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
   230  	}
   231  	for _, pid := range pids {
   232  		if p, err := os.FindProcess(pid); err == nil {
   233  			procs = append(procs, p)
   234  			if err := p.Kill(); err != nil {
   235  				logrus.Warn(err)
   236  			}
   237  		}
   238  	}
   239  	if err := c.Resume(); err != nil {
   240  		logrus.Warn(err)
   241  	}
   242  	for _, p := range procs {
   243  		if _, err := p.Wait(); err != nil {
   244  			logrus.Warn(err)
   245  		}
   246  	}
   247  }
   248  
   249  func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
   250  	return func() (*os.ProcessState, error) {
   251  		pid, err := p.Pid()
   252  		if err != nil {
   253  			return nil, err
   254  		}
   255  
   256  		process, err := os.FindProcess(pid)
   257  		s, err := process.Wait()
   258  		if err != nil {
   259  			execErr, ok := err.(*exec.ExitError)
   260  			if !ok {
   261  				return s, err
   262  			}
   263  			s = execErr.ProcessState
   264  		}
   265  		killCgroupProcs(c)
   266  		p.Wait()
   267  		return s, err
   268  	}
   269  }
   270  
   271  // Kill implements the exec driver Driver interface.
   272  func (d *Driver) Kill(c *execdriver.Command, sig int) error {
   273  	d.Lock()
   274  	active := d.activeContainers[c.ID]
   275  	d.Unlock()
   276  	if active == nil {
   277  		return fmt.Errorf("active container for %s does not exist", c.ID)
   278  	}
   279  	state, err := active.State()
   280  	if err != nil {
   281  		return err
   282  	}
   283  	return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
   284  }
   285  
   286  // Pause implements the exec driver Driver interface,
   287  // it calls libcontainer API to pause a container.
   288  func (d *Driver) Pause(c *execdriver.Command) error {
   289  	d.Lock()
   290  	active := d.activeContainers[c.ID]
   291  	d.Unlock()
   292  	if active == nil {
   293  		return fmt.Errorf("active container for %s does not exist", c.ID)
   294  	}
   295  	return active.Pause()
   296  }
   297  
   298  // Unpause implements the exec driver Driver interface,
   299  // it calls libcontainer API to unpause a container.
   300  func (d *Driver) Unpause(c *execdriver.Command) error {
   301  	d.Lock()
   302  	active := d.activeContainers[c.ID]
   303  	d.Unlock()
   304  	if active == nil {
   305  		return fmt.Errorf("active container for %s does not exist", c.ID)
   306  	}
   307  	return active.Resume()
   308  }
   309  
   310  // Terminate implements the exec driver Driver interface.
   311  func (d *Driver) Terminate(c *execdriver.Command) error {
   312  	defer d.cleanContainer(c.ID)
   313  	container, err := d.factory.Load(c.ID)
   314  	if err != nil {
   315  		return err
   316  	}
   317  	defer container.Destroy()
   318  	state, err := container.State()
   319  	if err != nil {
   320  		return err
   321  	}
   322  	pid := state.InitProcessPid
   323  	currentStartTime, err := system.GetProcessStartTime(pid)
   324  	if err != nil {
   325  		return err
   326  	}
   327  	if state.InitProcessStartTime == currentStartTime {
   328  		err = syscall.Kill(pid, 9)
   329  		syscall.Wait4(pid, nil, 0, nil)
   330  	}
   331  	return err
   332  }
   333  
   334  // Info implements the exec driver Driver interface.
   335  func (d *Driver) Info(id string) execdriver.Info {
   336  	return &info{
   337  		ID:     id,
   338  		driver: d,
   339  	}
   340  }
   341  
   342  // Name implements the exec driver Driver interface.
   343  func (d *Driver) Name() string {
   344  	return fmt.Sprintf("%s-%s", DriverName, Version)
   345  }
   346  
   347  // GetPidsForContainer implements the exec driver Driver interface.
   348  func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
   349  	d.Lock()
   350  	active := d.activeContainers[id]
   351  	d.Unlock()
   352  
   353  	if active == nil {
   354  		return nil, fmt.Errorf("active container for %s does not exist", id)
   355  	}
   356  	return active.Processes()
   357  }
   358  
   359  func (d *Driver) cleanContainer(id string) error {
   360  	d.Lock()
   361  	delete(d.activeContainers, id)
   362  	d.Unlock()
   363  	return os.RemoveAll(filepath.Join(d.root, id))
   364  }
   365  
   366  func (d *Driver) createContainerRoot(id string) error {
   367  	return os.MkdirAll(filepath.Join(d.root, id), 0655)
   368  }
   369  
   370  // Clean implements the exec driver Driver interface.
   371  func (d *Driver) Clean(id string) error {
   372  	return os.RemoveAll(filepath.Join(d.root, id))
   373  }
   374  
   375  // Stats implements the exec driver Driver interface.
   376  func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
   377  	d.Lock()
   378  	c := d.activeContainers[id]
   379  	d.Unlock()
   380  	if c == nil {
   381  		return nil, execdriver.ErrNotRunning
   382  	}
   383  	now := time.Now()
   384  	stats, err := c.Stats()
   385  	if err != nil {
   386  		return nil, err
   387  	}
   388  	memoryLimit := c.Config().Cgroups.Memory
   389  	// if the container does not have any memory limit specified set the
   390  	// limit to the machines memory
   391  	if memoryLimit == 0 {
   392  		memoryLimit = d.machineMemory
   393  	}
   394  	return &execdriver.ResourceStats{
   395  		Stats:       stats,
   396  		Read:        now,
   397  		MemoryLimit: memoryLimit,
   398  	}, nil
   399  }
   400  
   401  // TtyConsole implements the exec driver Terminal interface.
   402  type TtyConsole struct {
   403  	console libcontainer.Console
   404  }
   405  
   406  // NewTtyConsole returns a new TtyConsole struct.
   407  func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
   408  	tty := &TtyConsole{
   409  		console: console,
   410  	}
   411  
   412  	if err := tty.AttachPipes(pipes); err != nil {
   413  		tty.Close()
   414  		return nil, err
   415  	}
   416  
   417  	return tty, nil
   418  }
   419  
   420  // Resize implements Resize method of Terminal interface
   421  func (t *TtyConsole) Resize(h, w int) error {
   422  	return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
   423  }
   424  
   425  // AttachPipes attaches given pipes to TtyConsole
   426  func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
   427  	go func() {
   428  		if wb, ok := pipes.Stdout.(interface {
   429  			CloseWriters() error
   430  		}); ok {
   431  			defer wb.CloseWriters()
   432  		}
   433  
   434  		pools.Copy(pipes.Stdout, t.console)
   435  	}()
   436  
   437  	if pipes.Stdin != nil {
   438  		go func() {
   439  			pools.Copy(t.console, pipes.Stdin)
   440  
   441  			pipes.Stdin.Close()
   442  		}()
   443  	}
   444  
   445  	return nil
   446  }
   447  
   448  // Close implements Close method of Terminal interface
   449  func (t *TtyConsole) Close() error {
   450  	return t.console.Close()
   451  }
   452  
   453  func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
   454  
   455  	rootuid, err := container.HostUID()
   456  	if err != nil {
   457  		return err
   458  	}
   459  
   460  	if processConfig.Tty {
   461  		cons, err := p.NewConsole(rootuid)
   462  		if err != nil {
   463  			return err
   464  		}
   465  		term, err := NewTtyConsole(cons, pipes)
   466  		if err != nil {
   467  			return err
   468  		}
   469  		processConfig.Terminal = term
   470  		return nil
   471  	}
   472  	// not a tty--set up stdio pipes
   473  	term := &execdriver.StdConsole{}
   474  	processConfig.Terminal = term
   475  
   476  	// if we are not in a user namespace, there is no reason to go through
   477  	// the hassle of setting up os-level pipes with proper (remapped) ownership
   478  	// so we will do the prior shortcut for non-userns containers
   479  	if rootuid == 0 {
   480  		p.Stdout = pipes.Stdout
   481  		p.Stderr = pipes.Stderr
   482  
   483  		r, w, err := os.Pipe()
   484  		if err != nil {
   485  			return err
   486  		}
   487  		if pipes.Stdin != nil {
   488  			go func() {
   489  				io.Copy(w, pipes.Stdin)
   490  				w.Close()
   491  			}()
   492  			p.Stdin = r
   493  		}
   494  		return nil
   495  	}
   496  
   497  	// if we have user namespaces enabled (rootuid != 0), we will set
   498  	// up os pipes for stderr, stdout, stdin so we can chown them to
   499  	// the proper ownership to allow for proper access to the underlying
   500  	// fds
   501  	var fds []int
   502  
   503  	//setup stdout
   504  	r, w, err := os.Pipe()
   505  	if err != nil {
   506  		return err
   507  	}
   508  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   509  	if pipes.Stdout != nil {
   510  		go io.Copy(pipes.Stdout, r)
   511  	}
   512  	term.Closers = append(term.Closers, r)
   513  	p.Stdout = w
   514  
   515  	//setup stderr
   516  	r, w, err = os.Pipe()
   517  	if err != nil {
   518  		return err
   519  	}
   520  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   521  	if pipes.Stderr != nil {
   522  		go io.Copy(pipes.Stderr, r)
   523  	}
   524  	term.Closers = append(term.Closers, r)
   525  	p.Stderr = w
   526  
   527  	//setup stdin
   528  	r, w, err = os.Pipe()
   529  	if err != nil {
   530  		return err
   531  	}
   532  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   533  	if pipes.Stdin != nil {
   534  		go func() {
   535  			io.Copy(w, pipes.Stdin)
   536  			w.Close()
   537  		}()
   538  		p.Stdin = r
   539  	}
   540  	for _, fd := range fds {
   541  		if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
   542  			return fmt.Errorf("Failed to chown pipes fd: %v", err)
   543  		}
   544  	}
   545  	return nil
   546  }
   547  
   548  // SupportsHooks implements the execdriver Driver interface.
   549  // The libcontainer/runC-based native execdriver does exploit the hook mechanism
   550  func (d *Driver) SupportsHooks() bool {
   551  	return true
   552  }