github.com/walkingsparrow/docker@v1.4.2-0.20151218153551-b708a2249bfa/daemon/execdriver/native/create.go (about)

     1  // +build linux,cgo
     2  
     3  package native
     4  
     5  import (
     6  	"fmt"
     7  	"path/filepath"
     8  	"strings"
     9  	"syscall"
    10  
    11  	"github.com/docker/docker/daemon/execdriver"
    12  	derr "github.com/docker/docker/errors"
    13  	"github.com/docker/docker/pkg/mount"
    14  
    15  	"github.com/docker/docker/volume"
    16  	"github.com/opencontainers/runc/libcontainer/apparmor"
    17  	"github.com/opencontainers/runc/libcontainer/configs"
    18  	"github.com/opencontainers/runc/libcontainer/devices"
    19  )
    20  
    21  // createContainer populates and configures the container type with the
    22  // data provided by the execdriver.Command
    23  func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) {
    24  	container = execdriver.InitContainer(c)
    25  
    26  	if err := d.createIpc(container, c); err != nil {
    27  		return nil, err
    28  	}
    29  
    30  	if err := d.createPid(container, c); err != nil {
    31  		return nil, err
    32  	}
    33  
    34  	if err := d.createUTS(container, c); err != nil {
    35  		return nil, err
    36  	}
    37  
    38  	if err := d.setupRemappedRoot(container, c); err != nil {
    39  		return nil, err
    40  	}
    41  
    42  	if err := d.createNetwork(container, c, hooks); err != nil {
    43  		return nil, err
    44  	}
    45  
    46  	if c.ProcessConfig.Privileged {
    47  		if !container.Readonlyfs {
    48  			// clear readonly for /sys
    49  			for i := range container.Mounts {
    50  				if container.Mounts[i].Destination == "/sys" {
    51  					container.Mounts[i].Flags &= ^syscall.MS_RDONLY
    52  				}
    53  			}
    54  			container.ReadonlyPaths = nil
    55  		}
    56  
    57  		// clear readonly for cgroup
    58  		for i := range container.Mounts {
    59  			if container.Mounts[i].Device == "cgroup" {
    60  				container.Mounts[i].Flags &= ^syscall.MS_RDONLY
    61  			}
    62  		}
    63  
    64  		container.MaskPaths = nil
    65  		if err := d.setPrivileged(container); err != nil {
    66  			return nil, err
    67  		}
    68  	} else {
    69  		if err := d.setCapabilities(container, c); err != nil {
    70  			return nil, err
    71  		}
    72  	}
    73  	// add CAP_ prefix to all caps for new libcontainer update to match
    74  	// the spec format.
    75  	for i, s := range container.Capabilities {
    76  		if !strings.HasPrefix(s, "CAP_") {
    77  			container.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
    78  		}
    79  	}
    80  	container.AdditionalGroups = c.GroupAdd
    81  
    82  	if c.AppArmorProfile != "" {
    83  		container.AppArmorProfile = c.AppArmorProfile
    84  	}
    85  
    86  	if c.SeccompProfile != "" {
    87  		container.Seccomp, err = loadSeccompProfile(c.SeccompProfile)
    88  		if err != nil {
    89  			return nil, err
    90  		}
    91  	}
    92  	if err := execdriver.SetupCgroups(container, c); err != nil {
    93  		return nil, err
    94  	}
    95  
    96  	container.OomScoreAdj = c.OomScoreAdj
    97  
    98  	if container.Readonlyfs {
    99  		for i := range container.Mounts {
   100  			switch container.Mounts[i].Destination {
   101  			case "/proc", "/dev", "/dev/pts":
   102  				continue
   103  			}
   104  			container.Mounts[i].Flags |= syscall.MS_RDONLY
   105  		}
   106  
   107  		/* These paths must be remounted as r/o */
   108  		container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev")
   109  	}
   110  
   111  	if err := d.setupMounts(container, c); err != nil {
   112  		return nil, err
   113  	}
   114  
   115  	d.setupLabels(container, c)
   116  	d.setupRlimits(container, c)
   117  	return container, nil
   118  }
   119  
   120  func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error {
   121  	if c.Network == nil {
   122  		return nil
   123  	}
   124  	if c.Network.ContainerID != "" {
   125  		d.Lock()
   126  		active := d.activeContainers[c.Network.ContainerID]
   127  		d.Unlock()
   128  
   129  		if active == nil {
   130  			return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
   131  		}
   132  
   133  		state, err := active.State()
   134  		if err != nil {
   135  			return err
   136  		}
   137  
   138  		container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
   139  		return nil
   140  	}
   141  
   142  	if c.Network.NamespacePath != "" {
   143  		container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath)
   144  		return nil
   145  	}
   146  	// only set up prestart hook if the namespace path is not set (this should be
   147  	// all cases *except* for --net=host shared networking)
   148  	container.Hooks = &configs.Hooks{
   149  		Prestart: []configs.Hook{
   150  			configs.NewFunctionHook(func(s configs.HookState) error {
   151  				if len(hooks.PreStart) > 0 {
   152  					for _, fnHook := range hooks.PreStart {
   153  						// A closed channel for OOM is returned here as it will be
   154  						// non-blocking and return the correct result when read.
   155  						chOOM := make(chan struct{})
   156  						close(chOOM)
   157  						if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
   158  							return err
   159  						}
   160  					}
   161  				}
   162  				return nil
   163  			}),
   164  		},
   165  	}
   166  	return nil
   167  }
   168  
   169  func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error {
   170  	if c.Ipc.HostIpc {
   171  		container.Namespaces.Remove(configs.NEWIPC)
   172  		return nil
   173  	}
   174  
   175  	if c.Ipc.ContainerID != "" {
   176  		d.Lock()
   177  		active := d.activeContainers[c.Ipc.ContainerID]
   178  		d.Unlock()
   179  
   180  		if active == nil {
   181  			return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
   182  		}
   183  
   184  		state, err := active.State()
   185  		if err != nil {
   186  			return err
   187  		}
   188  		container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
   189  	}
   190  
   191  	return nil
   192  }
   193  
   194  func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error {
   195  	if c.Pid.HostPid {
   196  		container.Namespaces.Remove(configs.NEWPID)
   197  		return nil
   198  	}
   199  
   200  	return nil
   201  }
   202  
   203  func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error {
   204  	if c.UTS.HostUTS {
   205  		container.Namespaces.Remove(configs.NEWUTS)
   206  		container.Hostname = ""
   207  		return nil
   208  	}
   209  
   210  	return nil
   211  }
   212  
   213  func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
   214  	if c.RemappedRoot.UID == 0 {
   215  		container.Namespaces.Remove(configs.NEWUSER)
   216  		return nil
   217  	}
   218  
   219  	// convert the Docker daemon id map to the libcontainer variant of the same struct
   220  	// this keeps us from having to import libcontainer code across Docker client + daemon packages
   221  	cuidMaps := []configs.IDMap{}
   222  	cgidMaps := []configs.IDMap{}
   223  	for _, idMap := range c.UIDMapping {
   224  		cuidMaps = append(cuidMaps, configs.IDMap(idMap))
   225  	}
   226  	for _, idMap := range c.GIDMapping {
   227  		cgidMaps = append(cgidMaps, configs.IDMap(idMap))
   228  	}
   229  	container.UidMappings = cuidMaps
   230  	container.GidMappings = cgidMaps
   231  
   232  	for _, node := range container.Devices {
   233  		node.Uid = uint32(c.RemappedRoot.UID)
   234  		node.Gid = uint32(c.RemappedRoot.GID)
   235  	}
   236  	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
   237  	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
   238  	for i := range container.Mounts {
   239  		if container.Mounts[i].Device == "cgroup" {
   240  			container.Mounts[i].Flags &= ^syscall.MS_RDONLY
   241  		}
   242  	}
   243  
   244  	return nil
   245  }
   246  
   247  func (d *Driver) setPrivileged(container *configs.Config) (err error) {
   248  	container.Capabilities = execdriver.GetAllCapabilities()
   249  	container.Cgroups.AllowAllDevices = true
   250  
   251  	hostDevices, err := devices.HostDevices()
   252  	if err != nil {
   253  		return err
   254  	}
   255  	container.Devices = hostDevices
   256  
   257  	if apparmor.IsEnabled() {
   258  		container.AppArmorProfile = "unconfined"
   259  	}
   260  	return nil
   261  }
   262  
   263  func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
   264  	container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
   265  	return err
   266  }
   267  
   268  func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
   269  	if c.Resources == nil {
   270  		return
   271  	}
   272  
   273  	for _, rlimit := range c.Resources.Rlimits {
   274  		container.Rlimits = append(container.Rlimits, configs.Rlimit{
   275  			Type: rlimit.Type,
   276  			Hard: rlimit.Hard,
   277  			Soft: rlimit.Soft,
   278  		})
   279  	}
   280  }
   281  
   282  // If rootfs mount propagation is RPRIVATE, that means all the volumes are
   283  // going to be private anyway. There is no need to apply per volume
   284  // propagation on top. This is just an optimzation so that cost of per volume
   285  // propagation is paid only if user decides to make some volume non-private
   286  // which will force rootfs mount propagation to be non RPRIVATE.
   287  func checkResetVolumePropagation(container *configs.Config) {
   288  	if container.RootPropagation != mount.RPRIVATE {
   289  		return
   290  	}
   291  	for _, m := range container.Mounts {
   292  		m.PropagationFlags = nil
   293  	}
   294  }
   295  
   296  func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
   297  	for _, m := range mountinfo {
   298  		if m.Mountpoint == dir {
   299  			return m
   300  		}
   301  	}
   302  	return nil
   303  }
   304  
   305  // Get the source mount point of directory passed in as argument. Also return
   306  // optional fields.
   307  func getSourceMount(source string) (string, string, error) {
   308  	// Ensure any symlinks are resolved.
   309  	sourcePath, err := filepath.EvalSymlinks(source)
   310  	if err != nil {
   311  		return "", "", err
   312  	}
   313  
   314  	mountinfos, err := mount.GetMounts()
   315  	if err != nil {
   316  		return "", "", err
   317  	}
   318  
   319  	mountinfo := getMountInfo(mountinfos, sourcePath)
   320  	if mountinfo != nil {
   321  		return sourcePath, mountinfo.Optional, nil
   322  	}
   323  
   324  	path := sourcePath
   325  	for {
   326  		path = filepath.Dir(path)
   327  
   328  		mountinfo = getMountInfo(mountinfos, path)
   329  		if mountinfo != nil {
   330  			return path, mountinfo.Optional, nil
   331  		}
   332  
   333  		if path == "/" {
   334  			break
   335  		}
   336  	}
   337  
   338  	// If we are here, we did not find parent mount. Something is wrong.
   339  	return "", "", fmt.Errorf("Could not find source mount of %s", source)
   340  }
   341  
   342  // Ensure mount point on which path is mouted, is shared.
   343  func ensureShared(path string) error {
   344  	sharedMount := false
   345  
   346  	sourceMount, optionalOpts, err := getSourceMount(path)
   347  	if err != nil {
   348  		return err
   349  	}
   350  	// Make sure source mount point is shared.
   351  	optsSplit := strings.Split(optionalOpts, " ")
   352  	for _, opt := range optsSplit {
   353  		if strings.HasPrefix(opt, "shared:") {
   354  			sharedMount = true
   355  			break
   356  		}
   357  	}
   358  
   359  	if !sharedMount {
   360  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
   361  	}
   362  	return nil
   363  }
   364  
   365  // Ensure mount point on which path is mounted, is either shared or slave.
   366  func ensureSharedOrSlave(path string) error {
   367  	sharedMount := false
   368  	slaveMount := false
   369  
   370  	sourceMount, optionalOpts, err := getSourceMount(path)
   371  	if err != nil {
   372  		return err
   373  	}
   374  	// Make sure source mount point is shared.
   375  	optsSplit := strings.Split(optionalOpts, " ")
   376  	for _, opt := range optsSplit {
   377  		if strings.HasPrefix(opt, "shared:") {
   378  			sharedMount = true
   379  			break
   380  		} else if strings.HasPrefix(opt, "master:") {
   381  			slaveMount = true
   382  			break
   383  		}
   384  	}
   385  
   386  	if !sharedMount && !slaveMount {
   387  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
   388  	}
   389  	return nil
   390  }
   391  
   392  func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
   393  	userMounts := make(map[string]struct{})
   394  	for _, m := range c.Mounts {
   395  		userMounts[m.Destination] = struct{}{}
   396  	}
   397  
   398  	// Filter out mounts that are overridden by user supplied mounts
   399  	var defaultMounts []*configs.Mount
   400  	_, mountDev := userMounts["/dev"]
   401  	for _, m := range container.Mounts {
   402  		if _, ok := userMounts[m.Destination]; !ok {
   403  			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
   404  				container.Devices = nil
   405  				continue
   406  			}
   407  			defaultMounts = append(defaultMounts, m)
   408  		}
   409  	}
   410  	container.Mounts = defaultMounts
   411  
   412  	mountPropagationMap := map[string]int{
   413  		"private":  mount.PRIVATE,
   414  		"rprivate": mount.RPRIVATE,
   415  		"shared":   mount.SHARED,
   416  		"rshared":  mount.RSHARED,
   417  		"slave":    mount.SLAVE,
   418  		"rslave":   mount.RSLAVE,
   419  	}
   420  
   421  	for _, m := range c.Mounts {
   422  		for _, cm := range container.Mounts {
   423  			if cm.Destination == m.Destination {
   424  				return derr.ErrorCodeMountDup.WithArgs(m.Destination)
   425  			}
   426  		}
   427  
   428  		if m.Source == "tmpfs" {
   429  			var (
   430  				data  = "size=65536k"
   431  				flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
   432  				err   error
   433  			)
   434  			fulldest := filepath.Join(c.Rootfs, m.Destination)
   435  			if m.Data != "" {
   436  				flags, data, err = mount.ParseTmpfsOptions(m.Data)
   437  				if err != nil {
   438  					return err
   439  				}
   440  			}
   441  			container.Mounts = append(container.Mounts, &configs.Mount{
   442  				Source:           m.Source,
   443  				Destination:      m.Destination,
   444  				Data:             data,
   445  				Device:           "tmpfs",
   446  				Flags:            flags,
   447  				PremountCmds:     genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination),
   448  				PostmountCmds:    genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination),
   449  				PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]},
   450  			})
   451  			continue
   452  		}
   453  		flags := syscall.MS_BIND | syscall.MS_REC
   454  		var pFlag int
   455  		if !m.Writable {
   456  			flags |= syscall.MS_RDONLY
   457  		}
   458  
   459  		// Determine property of RootPropagation based on volume
   460  		// properties. If a volume is shared, then keep root propagtion
   461  		// shared. This should work for slave and private volumes too.
   462  		//
   463  		// For slave volumes, it can be either [r]shared/[r]slave.
   464  		//
   465  		// For private volumes any root propagation value should work.
   466  
   467  		pFlag = mountPropagationMap[m.Propagation]
   468  		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
   469  			if err := ensureShared(m.Source); err != nil {
   470  				return err
   471  			}
   472  			rootpg := container.RootPropagation
   473  			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
   474  				execdriver.SetRootPropagation(container, mount.SHARED)
   475  			}
   476  		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
   477  			if err := ensureSharedOrSlave(m.Source); err != nil {
   478  				return err
   479  			}
   480  			rootpg := container.RootPropagation
   481  			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
   482  				execdriver.SetRootPropagation(container, mount.RSLAVE)
   483  			}
   484  		}
   485  
   486  		mount := &configs.Mount{
   487  			Source:      m.Source,
   488  			Destination: m.Destination,
   489  			Device:      "bind",
   490  			Flags:       flags,
   491  		}
   492  
   493  		if pFlag != 0 {
   494  			mount.PropagationFlags = []int{pFlag}
   495  		}
   496  
   497  		container.Mounts = append(container.Mounts, mount)
   498  	}
   499  
   500  	checkResetVolumePropagation(container)
   501  	return nil
   502  }
   503  
   504  func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) {
   505  	container.ProcessLabel = c.ProcessLabel
   506  	container.MountLabel = c.MountLabel
   507  }