github.com/endocode/docker@v1.4.2-0.20160113120958-46eb4700391e/daemon/execdriver/native/create.go (about)

     1  // +build linux,cgo
     2  
     3  package native
     4  
     5  import (
     6  	"fmt"
     7  	"path/filepath"
     8  	"strings"
     9  	"syscall"
    10  
    11  	"github.com/docker/docker/daemon/execdriver"
    12  	derr "github.com/docker/docker/errors"
    13  	"github.com/docker/docker/pkg/mount"
    14  
    15  	"github.com/docker/docker/volume"
    16  	"github.com/opencontainers/runc/libcontainer/apparmor"
    17  	"github.com/opencontainers/runc/libcontainer/configs"
    18  	"github.com/opencontainers/runc/libcontainer/devices"
    19  )
    20  
    21  // createContainer populates and configures the container type with the
    22  // data provided by the execdriver.Command
    23  func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) {
    24  	container = execdriver.InitContainer(c)
    25  
    26  	if err := d.createIpc(container, c); err != nil {
    27  		return nil, err
    28  	}
    29  
    30  	if err := d.createPid(container, c); err != nil {
    31  		return nil, err
    32  	}
    33  
    34  	if err := d.createUTS(container, c); err != nil {
    35  		return nil, err
    36  	}
    37  
    38  	if err := d.setupRemappedRoot(container, c); err != nil {
    39  		return nil, err
    40  	}
    41  
    42  	if err := d.createNetwork(container, c, hooks); err != nil {
    43  		return nil, err
    44  	}
    45  
    46  	if c.ProcessConfig.Privileged {
    47  		if !container.Readonlyfs {
    48  			// clear readonly for /sys
    49  			for i := range container.Mounts {
    50  				if container.Mounts[i].Destination == "/sys" {
    51  					container.Mounts[i].Flags &= ^syscall.MS_RDONLY
    52  				}
    53  			}
    54  			container.ReadonlyPaths = nil
    55  		}
    56  
    57  		// clear readonly for cgroup
    58  		for i := range container.Mounts {
    59  			if container.Mounts[i].Device == "cgroup" {
    60  				container.Mounts[i].Flags &= ^syscall.MS_RDONLY
    61  			}
    62  		}
    63  
    64  		container.MaskPaths = nil
    65  		if err := d.setPrivileged(container); err != nil {
    66  			return nil, err
    67  		}
    68  	} else {
    69  		if err := d.setCapabilities(container, c); err != nil {
    70  			return nil, err
    71  		}
    72  
    73  		if c.SeccompProfile == "" {
    74  			container.Seccomp = getDefaultSeccompProfile()
    75  		}
    76  	}
    77  	// add CAP_ prefix to all caps for new libcontainer update to match
    78  	// the spec format.
    79  	for i, s := range container.Capabilities {
    80  		if !strings.HasPrefix(s, "CAP_") {
    81  			container.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
    82  		}
    83  	}
    84  	container.AdditionalGroups = c.GroupAdd
    85  
    86  	if c.AppArmorProfile != "" {
    87  		container.AppArmorProfile = c.AppArmorProfile
    88  	}
    89  
    90  	if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" {
    91  		container.Seccomp, err = loadSeccompProfile(c.SeccompProfile)
    92  		if err != nil {
    93  			return nil, err
    94  		}
    95  	}
    96  
    97  	if err := execdriver.SetupCgroups(container, c); err != nil {
    98  		return nil, err
    99  	}
   100  
   101  	container.OomScoreAdj = c.OomScoreAdj
   102  
   103  	if container.Readonlyfs {
   104  		for i := range container.Mounts {
   105  			switch container.Mounts[i].Destination {
   106  			case "/proc", "/dev", "/dev/pts":
   107  				continue
   108  			}
   109  			container.Mounts[i].Flags |= syscall.MS_RDONLY
   110  		}
   111  
   112  		/* These paths must be remounted as r/o */
   113  		container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev")
   114  	}
   115  
   116  	if err := d.setupMounts(container, c); err != nil {
   117  		return nil, err
   118  	}
   119  
   120  	d.setupLabels(container, c)
   121  	d.setupRlimits(container, c)
   122  	return container, nil
   123  }
   124  
   125  func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error {
   126  	if c.Network == nil {
   127  		return nil
   128  	}
   129  	if c.Network.ContainerID != "" {
   130  		d.Lock()
   131  		active := d.activeContainers[c.Network.ContainerID]
   132  		d.Unlock()
   133  
   134  		if active == nil {
   135  			return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
   136  		}
   137  
   138  		state, err := active.State()
   139  		if err != nil {
   140  			return err
   141  		}
   142  
   143  		container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
   144  		return nil
   145  	}
   146  
   147  	if c.Network.NamespacePath != "" {
   148  		container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath)
   149  		return nil
   150  	}
   151  	// only set up prestart hook if the namespace path is not set (this should be
   152  	// all cases *except* for --net=host shared networking)
   153  	container.Hooks = &configs.Hooks{
   154  		Prestart: []configs.Hook{
   155  			configs.NewFunctionHook(func(s configs.HookState) error {
   156  				if len(hooks.PreStart) > 0 {
   157  					for _, fnHook := range hooks.PreStart {
   158  						// A closed channel for OOM is returned here as it will be
   159  						// non-blocking and return the correct result when read.
   160  						chOOM := make(chan struct{})
   161  						close(chOOM)
   162  						if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
   163  							return err
   164  						}
   165  					}
   166  				}
   167  				return nil
   168  			}),
   169  		},
   170  	}
   171  	return nil
   172  }
   173  
   174  func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error {
   175  	if c.Ipc.HostIpc {
   176  		container.Namespaces.Remove(configs.NEWIPC)
   177  		return nil
   178  	}
   179  
   180  	if c.Ipc.ContainerID != "" {
   181  		d.Lock()
   182  		active := d.activeContainers[c.Ipc.ContainerID]
   183  		d.Unlock()
   184  
   185  		if active == nil {
   186  			return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
   187  		}
   188  
   189  		state, err := active.State()
   190  		if err != nil {
   191  			return err
   192  		}
   193  		container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
   194  	}
   195  
   196  	return nil
   197  }
   198  
   199  func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error {
   200  	if c.Pid.HostPid {
   201  		container.Namespaces.Remove(configs.NEWPID)
   202  		return nil
   203  	}
   204  
   205  	return nil
   206  }
   207  
   208  func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error {
   209  	if c.UTS.HostUTS {
   210  		container.Namespaces.Remove(configs.NEWUTS)
   211  		container.Hostname = ""
   212  		return nil
   213  	}
   214  
   215  	return nil
   216  }
   217  
   218  func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
   219  	if c.RemappedRoot.UID == 0 {
   220  		container.Namespaces.Remove(configs.NEWUSER)
   221  		return nil
   222  	}
   223  
   224  	// convert the Docker daemon id map to the libcontainer variant of the same struct
   225  	// this keeps us from having to import libcontainer code across Docker client + daemon packages
   226  	cuidMaps := []configs.IDMap{}
   227  	cgidMaps := []configs.IDMap{}
   228  	for _, idMap := range c.UIDMapping {
   229  		cuidMaps = append(cuidMaps, configs.IDMap(idMap))
   230  	}
   231  	for _, idMap := range c.GIDMapping {
   232  		cgidMaps = append(cgidMaps, configs.IDMap(idMap))
   233  	}
   234  	container.UidMappings = cuidMaps
   235  	container.GidMappings = cgidMaps
   236  
   237  	for _, node := range container.Devices {
   238  		node.Uid = uint32(c.RemappedRoot.UID)
   239  		node.Gid = uint32(c.RemappedRoot.GID)
   240  	}
   241  	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
   242  	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
   243  	for i := range container.Mounts {
   244  		if container.Mounts[i].Device == "cgroup" {
   245  			container.Mounts[i].Flags &= ^syscall.MS_RDONLY
   246  		}
   247  	}
   248  
   249  	return nil
   250  }
   251  
   252  func (d *Driver) setPrivileged(container *configs.Config) (err error) {
   253  	container.Capabilities = execdriver.GetAllCapabilities()
   254  	container.Cgroups.Resources.AllowAllDevices = true
   255  
   256  	hostDevices, err := devices.HostDevices()
   257  	if err != nil {
   258  		return err
   259  	}
   260  	container.Devices = hostDevices
   261  
   262  	if apparmor.IsEnabled() {
   263  		container.AppArmorProfile = "unconfined"
   264  	}
   265  	return nil
   266  }
   267  
   268  func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
   269  	container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
   270  	return err
   271  }
   272  
   273  func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
   274  	if c.Resources == nil {
   275  		return
   276  	}
   277  
   278  	for _, rlimit := range c.Resources.Rlimits {
   279  		container.Rlimits = append(container.Rlimits, configs.Rlimit{
   280  			Type: rlimit.Type,
   281  			Hard: rlimit.Hard,
   282  			Soft: rlimit.Soft,
   283  		})
   284  	}
   285  }
   286  
   287  // If rootfs mount propagation is RPRIVATE, that means all the volumes are
   288  // going to be private anyway. There is no need to apply per volume
   289  // propagation on top. This is just an optimzation so that cost of per volume
   290  // propagation is paid only if user decides to make some volume non-private
   291  // which will force rootfs mount propagation to be non RPRIVATE.
   292  func checkResetVolumePropagation(container *configs.Config) {
   293  	if container.RootPropagation != mount.RPRIVATE {
   294  		return
   295  	}
   296  	for _, m := range container.Mounts {
   297  		m.PropagationFlags = nil
   298  	}
   299  }
   300  
   301  func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
   302  	for _, m := range mountinfo {
   303  		if m.Mountpoint == dir {
   304  			return m
   305  		}
   306  	}
   307  	return nil
   308  }
   309  
   310  // Get the source mount point of directory passed in as argument. Also return
   311  // optional fields.
   312  func getSourceMount(source string) (string, string, error) {
   313  	// Ensure any symlinks are resolved.
   314  	sourcePath, err := filepath.EvalSymlinks(source)
   315  	if err != nil {
   316  		return "", "", err
   317  	}
   318  
   319  	mountinfos, err := mount.GetMounts()
   320  	if err != nil {
   321  		return "", "", err
   322  	}
   323  
   324  	mountinfo := getMountInfo(mountinfos, sourcePath)
   325  	if mountinfo != nil {
   326  		return sourcePath, mountinfo.Optional, nil
   327  	}
   328  
   329  	path := sourcePath
   330  	for {
   331  		path = filepath.Dir(path)
   332  
   333  		mountinfo = getMountInfo(mountinfos, path)
   334  		if mountinfo != nil {
   335  			return path, mountinfo.Optional, nil
   336  		}
   337  
   338  		if path == "/" {
   339  			break
   340  		}
   341  	}
   342  
   343  	// If we are here, we did not find parent mount. Something is wrong.
   344  	return "", "", fmt.Errorf("Could not find source mount of %s", source)
   345  }
   346  
   347  // Ensure mount point on which path is mouted, is shared.
   348  func ensureShared(path string) error {
   349  	sharedMount := false
   350  
   351  	sourceMount, optionalOpts, err := getSourceMount(path)
   352  	if err != nil {
   353  		return err
   354  	}
   355  	// Make sure source mount point is shared.
   356  	optsSplit := strings.Split(optionalOpts, " ")
   357  	for _, opt := range optsSplit {
   358  		if strings.HasPrefix(opt, "shared:") {
   359  			sharedMount = true
   360  			break
   361  		}
   362  	}
   363  
   364  	if !sharedMount {
   365  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
   366  	}
   367  	return nil
   368  }
   369  
   370  // Ensure mount point on which path is mounted, is either shared or slave.
   371  func ensureSharedOrSlave(path string) error {
   372  	sharedMount := false
   373  	slaveMount := false
   374  
   375  	sourceMount, optionalOpts, err := getSourceMount(path)
   376  	if err != nil {
   377  		return err
   378  	}
   379  	// Make sure source mount point is shared.
   380  	optsSplit := strings.Split(optionalOpts, " ")
   381  	for _, opt := range optsSplit {
   382  		if strings.HasPrefix(opt, "shared:") {
   383  			sharedMount = true
   384  			break
   385  		} else if strings.HasPrefix(opt, "master:") {
   386  			slaveMount = true
   387  			break
   388  		}
   389  	}
   390  
   391  	if !sharedMount && !slaveMount {
   392  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
   393  	}
   394  	return nil
   395  }
   396  
   397  func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
   398  	userMounts := make(map[string]struct{})
   399  	for _, m := range c.Mounts {
   400  		userMounts[m.Destination] = struct{}{}
   401  	}
   402  
   403  	// Filter out mounts that are overridden by user supplied mounts
   404  	var defaultMounts []*configs.Mount
   405  	_, mountDev := userMounts["/dev"]
   406  	for _, m := range container.Mounts {
   407  		if _, ok := userMounts[m.Destination]; !ok {
   408  			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
   409  				container.Devices = nil
   410  				continue
   411  			}
   412  			defaultMounts = append(defaultMounts, m)
   413  		}
   414  	}
   415  	container.Mounts = defaultMounts
   416  
   417  	mountPropagationMap := map[string]int{
   418  		"private":  mount.PRIVATE,
   419  		"rprivate": mount.RPRIVATE,
   420  		"shared":   mount.SHARED,
   421  		"rshared":  mount.RSHARED,
   422  		"slave":    mount.SLAVE,
   423  		"rslave":   mount.RSLAVE,
   424  	}
   425  
   426  	for _, m := range c.Mounts {
   427  		for _, cm := range container.Mounts {
   428  			if cm.Destination == m.Destination {
   429  				return derr.ErrorCodeMountDup.WithArgs(m.Destination)
   430  			}
   431  		}
   432  
   433  		if m.Source == "tmpfs" {
   434  			var (
   435  				data  = "size=65536k"
   436  				flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
   437  				err   error
   438  			)
   439  			fulldest := filepath.Join(c.Rootfs, m.Destination)
   440  			if m.Data != "" {
   441  				flags, data, err = mount.ParseTmpfsOptions(m.Data)
   442  				if err != nil {
   443  					return err
   444  				}
   445  			}
   446  			container.Mounts = append(container.Mounts, &configs.Mount{
   447  				Source:           m.Source,
   448  				Destination:      m.Destination,
   449  				Data:             data,
   450  				Device:           "tmpfs",
   451  				Flags:            flags,
   452  				PremountCmds:     genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination),
   453  				PostmountCmds:    genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination),
   454  				PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]},
   455  			})
   456  			continue
   457  		}
   458  		flags := syscall.MS_BIND | syscall.MS_REC
   459  		var pFlag int
   460  		if !m.Writable {
   461  			flags |= syscall.MS_RDONLY
   462  		}
   463  
   464  		// Determine property of RootPropagation based on volume
   465  		// properties. If a volume is shared, then keep root propagtion
   466  		// shared. This should work for slave and private volumes too.
   467  		//
   468  		// For slave volumes, it can be either [r]shared/[r]slave.
   469  		//
   470  		// For private volumes any root propagation value should work.
   471  
   472  		pFlag = mountPropagationMap[m.Propagation]
   473  		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
   474  			if err := ensureShared(m.Source); err != nil {
   475  				return err
   476  			}
   477  			rootpg := container.RootPropagation
   478  			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
   479  				execdriver.SetRootPropagation(container, mount.SHARED)
   480  			}
   481  		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
   482  			if err := ensureSharedOrSlave(m.Source); err != nil {
   483  				return err
   484  			}
   485  			rootpg := container.RootPropagation
   486  			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
   487  				execdriver.SetRootPropagation(container, mount.RSLAVE)
   488  			}
   489  		}
   490  
   491  		mount := &configs.Mount{
   492  			Source:      m.Source,
   493  			Destination: m.Destination,
   494  			Device:      "bind",
   495  			Flags:       flags,
   496  		}
   497  
   498  		if pFlag != 0 {
   499  			mount.PropagationFlags = []int{pFlag}
   500  		}
   501  
   502  		container.Mounts = append(container.Mounts, mount)
   503  	}
   504  
   505  	checkResetVolumePropagation(container)
   506  	return nil
   507  }
   508  
   509  func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) {
   510  	container.ProcessLabel = c.ProcessLabel
   511  	container.MountLabel = c.MountLabel
   512  }