github.com/tompao/docker@v1.9.1/daemon/execdriver/native/driver.go (about)

     1  // +build linux,cgo
     2  
     3  package native
     4  
     5  import (
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"os/exec"
    10  	"path/filepath"
    11  	"strings"
    12  	"sync"
    13  	"syscall"
    14  	"time"
    15  
    16  	"github.com/Sirupsen/logrus"
    17  	"github.com/docker/docker/daemon/execdriver"
    18  	"github.com/docker/docker/pkg/parsers"
    19  	"github.com/docker/docker/pkg/pools"
    20  	"github.com/docker/docker/pkg/reexec"
    21  	sysinfo "github.com/docker/docker/pkg/system"
    22  	"github.com/docker/docker/pkg/term"
    23  	"github.com/opencontainers/runc/libcontainer"
    24  	"github.com/opencontainers/runc/libcontainer/apparmor"
    25  	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
    26  	"github.com/opencontainers/runc/libcontainer/configs"
    27  	"github.com/opencontainers/runc/libcontainer/system"
    28  	"github.com/opencontainers/runc/libcontainer/utils"
    29  )
    30  
    31  // Define constants for native driver
    32  const (
    33  	DriverName = "native"
    34  	Version    = "0.2"
    35  )
    36  
    37  // Driver contains all information for native driver,
    38  // it implements execdriver.Driver.
    39  type Driver struct {
    40  	root             string
    41  	initPath         string
    42  	activeContainers map[string]libcontainer.Container
    43  	machineMemory    int64
    44  	factory          libcontainer.Factory
    45  	sync.Mutex
    46  }
    47  
    48  // NewDriver returns a new native driver, called from NewDriver of execdriver.
    49  func NewDriver(root, initPath string, options []string) (*Driver, error) {
    50  	meminfo, err := sysinfo.ReadMemInfo()
    51  	if err != nil {
    52  		return nil, err
    53  	}
    54  
    55  	if err := sysinfo.MkdirAll(root, 0700); err != nil {
    56  		return nil, err
    57  	}
    58  
    59  	if apparmor.IsEnabled() {
    60  		if err := installAppArmorProfile(); err != nil {
    61  			apparmorProfiles := []string{"docker-default"}
    62  
    63  			// Allow daemon to run if loading failed, but are active
    64  			// (possibly through another run, manually, or via system startup)
    65  			for _, policy := range apparmorProfiles {
    66  				if err := hasAppArmorProfileLoaded(policy); err != nil {
    67  					return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
    68  				}
    69  			}
    70  		}
    71  	}
    72  
    73  	// choose cgroup manager
    74  	// this makes sure there are no breaking changes to people
    75  	// who upgrade from versions without native.cgroupdriver opt
    76  	cgm := libcontainer.Cgroupfs
    77  	if systemd.UseSystemd() {
    78  		cgm = libcontainer.SystemdCgroups
    79  	}
    80  
    81  	// parse the options
    82  	for _, option := range options {
    83  		key, val, err := parsers.ParseKeyValueOpt(option)
    84  		if err != nil {
    85  			return nil, err
    86  		}
    87  		key = strings.ToLower(key)
    88  		switch key {
    89  		case "native.cgroupdriver":
    90  			// override the default if they set options
    91  			switch val {
    92  			case "systemd":
    93  				if systemd.UseSystemd() {
    94  					cgm = libcontainer.SystemdCgroups
    95  				} else {
    96  					// warn them that they chose the wrong driver
    97  					logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
    98  				}
    99  			case "cgroupfs":
   100  				cgm = libcontainer.Cgroupfs
   101  			default:
   102  				return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
   103  			}
   104  		default:
   105  			return nil, fmt.Errorf("Unknown option %s\n", key)
   106  		}
   107  	}
   108  
   109  	f, err := libcontainer.New(
   110  		root,
   111  		cgm,
   112  		libcontainer.InitPath(reexec.Self(), DriverName),
   113  	)
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  
   118  	return &Driver{
   119  		root:             root,
   120  		initPath:         initPath,
   121  		activeContainers: make(map[string]libcontainer.Container),
   122  		machineMemory:    meminfo.MemTotal,
   123  		factory:          f,
   124  	}, nil
   125  }
   126  
   127  type execOutput struct {
   128  	exitCode int
   129  	err      error
   130  }
   131  
   132  // Run implements the exec driver Driver interface,
   133  // it calls libcontainer APIs to run a container.
   134  func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
   135  	// take the Command and populate the libcontainer.Config from it
   136  	container, err := d.createContainer(c, hooks)
   137  	if err != nil {
   138  		return execdriver.ExitStatus{ExitCode: -1}, err
   139  	}
   140  
   141  	p := &libcontainer.Process{
   142  		Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
   143  		Env:  c.ProcessConfig.Env,
   144  		Cwd:  c.WorkingDir,
   145  		User: c.ProcessConfig.User,
   146  	}
   147  
   148  	if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
   149  		return execdriver.ExitStatus{ExitCode: -1}, err
   150  	}
   151  
   152  	cont, err := d.factory.Create(c.ID, container)
   153  	if err != nil {
   154  		return execdriver.ExitStatus{ExitCode: -1}, err
   155  	}
   156  	d.Lock()
   157  	d.activeContainers[c.ID] = cont
   158  	d.Unlock()
   159  	defer func() {
   160  		cont.Destroy()
   161  		d.cleanContainer(c.ID)
   162  	}()
   163  
   164  	if err := cont.Start(p); err != nil {
   165  		return execdriver.ExitStatus{ExitCode: -1}, err
   166  	}
   167  
   168  	oom := notifyOnOOM(cont)
   169  	if hooks.Start != nil {
   170  		pid, err := p.Pid()
   171  		if err != nil {
   172  			p.Signal(os.Kill)
   173  			p.Wait()
   174  			return execdriver.ExitStatus{ExitCode: -1}, err
   175  		}
   176  		hooks.Start(&c.ProcessConfig, pid, oom)
   177  	}
   178  
   179  	waitF := p.Wait
   180  	if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
   181  		// we need such hack for tracking processes with inherited fds,
   182  		// because cmd.Wait() waiting for all streams to be copied
   183  		waitF = waitInPIDHost(p, cont)
   184  	}
   185  	ps, err := waitF()
   186  	if err != nil {
   187  		execErr, ok := err.(*exec.ExitError)
   188  		if !ok {
   189  			return execdriver.ExitStatus{ExitCode: -1}, err
   190  		}
   191  		ps = execErr.ProcessState
   192  	}
   193  	cont.Destroy()
   194  	_, oomKill := <-oom
   195  	return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
   196  }
   197  
   198  // notifyOnOOM returns a channel that signals if the container received an OOM notification
   199  // for any process. If it is unable to subscribe to OOM notifications then a closed
   200  // channel is returned as it will be non-blocking and return the correct result when read.
   201  func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
   202  	oom, err := container.NotifyOOM()
   203  	if err != nil {
   204  		logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
   205  		c := make(chan struct{})
   206  		close(c)
   207  		return c
   208  	}
   209  	return oom
   210  }
   211  
   212  func killCgroupProcs(c libcontainer.Container) {
   213  	var procs []*os.Process
   214  	if err := c.Pause(); err != nil {
   215  		logrus.Warn(err)
   216  	}
   217  	pids, err := c.Processes()
   218  	if err != nil {
   219  		// don't care about childs if we can't get them, this is mostly because cgroup already deleted
   220  		logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
   221  	}
   222  	for _, pid := range pids {
   223  		if p, err := os.FindProcess(pid); err == nil {
   224  			procs = append(procs, p)
   225  			if err := p.Kill(); err != nil {
   226  				logrus.Warn(err)
   227  			}
   228  		}
   229  	}
   230  	if err := c.Resume(); err != nil {
   231  		logrus.Warn(err)
   232  	}
   233  	for _, p := range procs {
   234  		if _, err := p.Wait(); err != nil {
   235  			logrus.Warn(err)
   236  		}
   237  	}
   238  }
   239  
   240  func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
   241  	return func() (*os.ProcessState, error) {
   242  		pid, err := p.Pid()
   243  		if err != nil {
   244  			return nil, err
   245  		}
   246  
   247  		process, err := os.FindProcess(pid)
   248  		s, err := process.Wait()
   249  		if err != nil {
   250  			execErr, ok := err.(*exec.ExitError)
   251  			if !ok {
   252  				return s, err
   253  			}
   254  			s = execErr.ProcessState
   255  		}
   256  		killCgroupProcs(c)
   257  		p.Wait()
   258  		return s, err
   259  	}
   260  }
   261  
   262  // Kill implements the exec driver Driver interface.
   263  func (d *Driver) Kill(c *execdriver.Command, sig int) error {
   264  	d.Lock()
   265  	active := d.activeContainers[c.ID]
   266  	d.Unlock()
   267  	if active == nil {
   268  		return fmt.Errorf("active container for %s does not exist", c.ID)
   269  	}
   270  	state, err := active.State()
   271  	if err != nil {
   272  		return err
   273  	}
   274  	return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
   275  }
   276  
   277  // Pause implements the exec driver Driver interface,
   278  // it calls libcontainer API to pause a container.
   279  func (d *Driver) Pause(c *execdriver.Command) error {
   280  	d.Lock()
   281  	active := d.activeContainers[c.ID]
   282  	d.Unlock()
   283  	if active == nil {
   284  		return fmt.Errorf("active container for %s does not exist", c.ID)
   285  	}
   286  	return active.Pause()
   287  }
   288  
   289  // Unpause implements the exec driver Driver interface,
   290  // it calls libcontainer API to unpause a container.
   291  func (d *Driver) Unpause(c *execdriver.Command) error {
   292  	d.Lock()
   293  	active := d.activeContainers[c.ID]
   294  	d.Unlock()
   295  	if active == nil {
   296  		return fmt.Errorf("active container for %s does not exist", c.ID)
   297  	}
   298  	return active.Resume()
   299  }
   300  
   301  // Terminate implements the exec driver Driver interface.
   302  func (d *Driver) Terminate(c *execdriver.Command) error {
   303  	defer d.cleanContainer(c.ID)
   304  	container, err := d.factory.Load(c.ID)
   305  	if err != nil {
   306  		return err
   307  	}
   308  	defer container.Destroy()
   309  	state, err := container.State()
   310  	if err != nil {
   311  		return err
   312  	}
   313  	pid := state.InitProcessPid
   314  	currentStartTime, err := system.GetProcessStartTime(pid)
   315  	if err != nil {
   316  		return err
   317  	}
   318  	if state.InitProcessStartTime == currentStartTime {
   319  		err = syscall.Kill(pid, 9)
   320  		syscall.Wait4(pid, nil, 0, nil)
   321  	}
   322  	return err
   323  }
   324  
   325  // Info implements the exec driver Driver interface.
   326  func (d *Driver) Info(id string) execdriver.Info {
   327  	return &info{
   328  		ID:     id,
   329  		driver: d,
   330  	}
   331  }
   332  
   333  // Name implements the exec driver Driver interface.
   334  func (d *Driver) Name() string {
   335  	return fmt.Sprintf("%s-%s", DriverName, Version)
   336  }
   337  
   338  // GetPidsForContainer implements the exec driver Driver interface.
   339  func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
   340  	d.Lock()
   341  	active := d.activeContainers[id]
   342  	d.Unlock()
   343  
   344  	if active == nil {
   345  		return nil, fmt.Errorf("active container for %s does not exist", id)
   346  	}
   347  	return active.Processes()
   348  }
   349  
   350  func (d *Driver) cleanContainer(id string) error {
   351  	d.Lock()
   352  	delete(d.activeContainers, id)
   353  	d.Unlock()
   354  	return os.RemoveAll(filepath.Join(d.root, id))
   355  }
   356  
   357  func (d *Driver) createContainerRoot(id string) error {
   358  	return os.MkdirAll(filepath.Join(d.root, id), 0655)
   359  }
   360  
   361  // Clean implements the exec driver Driver interface.
   362  func (d *Driver) Clean(id string) error {
   363  	return os.RemoveAll(filepath.Join(d.root, id))
   364  }
   365  
   366  // Stats implements the exec driver Driver interface.
   367  func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
   368  	d.Lock()
   369  	c := d.activeContainers[id]
   370  	d.Unlock()
   371  	if c == nil {
   372  		return nil, execdriver.ErrNotRunning
   373  	}
   374  	now := time.Now()
   375  	stats, err := c.Stats()
   376  	if err != nil {
   377  		return nil, err
   378  	}
   379  	memoryLimit := c.Config().Cgroups.Memory
   380  	// if the container does not have any memory limit specified set the
   381  	// limit to the machines memory
   382  	if memoryLimit == 0 {
   383  		memoryLimit = d.machineMemory
   384  	}
   385  	return &execdriver.ResourceStats{
   386  		Stats:       stats,
   387  		Read:        now,
   388  		MemoryLimit: memoryLimit,
   389  	}, nil
   390  }
   391  
   392  // TtyConsole implements the exec driver Terminal interface.
   393  type TtyConsole struct {
   394  	console libcontainer.Console
   395  }
   396  
   397  // NewTtyConsole returns a new TtyConsole struct.
   398  func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
   399  	tty := &TtyConsole{
   400  		console: console,
   401  	}
   402  
   403  	if err := tty.AttachPipes(pipes); err != nil {
   404  		tty.Close()
   405  		return nil, err
   406  	}
   407  
   408  	return tty, nil
   409  }
   410  
   411  // Resize implements Resize method of Terminal interface
   412  func (t *TtyConsole) Resize(h, w int) error {
   413  	return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
   414  }
   415  
   416  // AttachPipes attaches given pipes to TtyConsole
   417  func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
   418  	go func() {
   419  		if wb, ok := pipes.Stdout.(interface {
   420  			CloseWriters() error
   421  		}); ok {
   422  			defer wb.CloseWriters()
   423  		}
   424  
   425  		pools.Copy(pipes.Stdout, t.console)
   426  	}()
   427  
   428  	if pipes.Stdin != nil {
   429  		go func() {
   430  			pools.Copy(t.console, pipes.Stdin)
   431  
   432  			pipes.Stdin.Close()
   433  		}()
   434  	}
   435  
   436  	return nil
   437  }
   438  
   439  // Close implements Close method of Terminal interface
   440  func (t *TtyConsole) Close() error {
   441  	return t.console.Close()
   442  }
   443  
   444  func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
   445  
   446  	rootuid, err := container.HostUID()
   447  	if err != nil {
   448  		return err
   449  	}
   450  
   451  	if processConfig.Tty {
   452  		cons, err := p.NewConsole(rootuid)
   453  		if err != nil {
   454  			return err
   455  		}
   456  		term, err := NewTtyConsole(cons, pipes)
   457  		if err != nil {
   458  			return err
   459  		}
   460  		processConfig.Terminal = term
   461  		return nil
   462  	}
   463  	// not a tty--set up stdio pipes
   464  	term := &execdriver.StdConsole{}
   465  	processConfig.Terminal = term
   466  
   467  	// if we are not in a user namespace, there is no reason to go through
   468  	// the hassle of setting up os-level pipes with proper (remapped) ownership
   469  	// so we will do the prior shortcut for non-userns containers
   470  	if rootuid == 0 {
   471  		p.Stdout = pipes.Stdout
   472  		p.Stderr = pipes.Stderr
   473  
   474  		r, w, err := os.Pipe()
   475  		if err != nil {
   476  			return err
   477  		}
   478  		if pipes.Stdin != nil {
   479  			go func() {
   480  				io.Copy(w, pipes.Stdin)
   481  				w.Close()
   482  			}()
   483  			p.Stdin = r
   484  		}
   485  		return nil
   486  	}
   487  
   488  	// if we have user namespaces enabled (rootuid != 0), we will set
   489  	// up os pipes for stderr, stdout, stdin so we can chown them to
   490  	// the proper ownership to allow for proper access to the underlying
   491  	// fds
   492  	var fds []int
   493  
   494  	//setup stdout
   495  	r, w, err := os.Pipe()
   496  	if err != nil {
   497  		return err
   498  	}
   499  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   500  	if pipes.Stdout != nil {
   501  		go io.Copy(pipes.Stdout, r)
   502  	}
   503  	term.Closers = append(term.Closers, r)
   504  	p.Stdout = w
   505  
   506  	//setup stderr
   507  	r, w, err = os.Pipe()
   508  	if err != nil {
   509  		return err
   510  	}
   511  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   512  	if pipes.Stderr != nil {
   513  		go io.Copy(pipes.Stderr, r)
   514  	}
   515  	term.Closers = append(term.Closers, r)
   516  	p.Stderr = w
   517  
   518  	//setup stdin
   519  	r, w, err = os.Pipe()
   520  	if err != nil {
   521  		return err
   522  	}
   523  	fds = append(fds, int(r.Fd()), int(w.Fd()))
   524  	if pipes.Stdin != nil {
   525  		go func() {
   526  			io.Copy(w, pipes.Stdin)
   527  			w.Close()
   528  		}()
   529  		p.Stdin = r
   530  	}
   531  	for _, fd := range fds {
   532  		if err := syscall.Fchown(fd, rootuid, rootuid); err != nil {
   533  			return fmt.Errorf("Failed to chown pipes fd: %v", err)
   534  		}
   535  	}
   536  	return nil
   537  }
   538  
   539  // SupportsHooks implements the execdriver Driver interface.
   540  // The libcontainer/runC-based native execdriver does exploit the hook mechanism
   541  func (d *Driver) SupportsHooks() bool {
   542  	return true
   543  }