github.com/flavio/docker@v0.1.3-0.20170117145210-f63d1a6eec47/daemon/oci_linux.go (about)

     1  package daemon
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"sort"
    10  	"strconv"
    11  	"strings"
    12  
    13  	"github.com/Sirupsen/logrus"
    14  	containertypes "github.com/docker/docker/api/types/container"
    15  	"github.com/docker/docker/container"
    16  	"github.com/docker/docker/daemon/caps"
    17  	"github.com/docker/docker/oci"
    18  	"github.com/docker/docker/pkg/idtools"
    19  	"github.com/docker/docker/pkg/mount"
    20  	"github.com/docker/docker/pkg/stringutils"
    21  	"github.com/docker/docker/pkg/symlink"
    22  	"github.com/docker/docker/volume"
    23  	"github.com/opencontainers/runc/libcontainer/apparmor"
    24  	"github.com/opencontainers/runc/libcontainer/cgroups"
    25  	"github.com/opencontainers/runc/libcontainer/devices"
    26  	"github.com/opencontainers/runc/libcontainer/user"
    27  	specs "github.com/opencontainers/runtime-spec/specs-go"
    28  )
    29  
    30  func setResources(s *specs.Spec, r containertypes.Resources) error {
    31  	weightDevices, err := getBlkioWeightDevices(r)
    32  	if err != nil {
    33  		return err
    34  	}
    35  	readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps)
    36  	if err != nil {
    37  		return err
    38  	}
    39  	writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps)
    40  	if err != nil {
    41  		return err
    42  	}
    43  	readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps)
    44  	if err != nil {
    45  		return err
    46  	}
    47  	writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps)
    48  	if err != nil {
    49  		return err
    50  	}
    51  
    52  	memoryRes := getMemoryResources(r)
    53  	cpuRes := getCPUResources(r)
    54  	blkioWeight := r.BlkioWeight
    55  
    56  	specResources := &specs.Resources{
    57  		Memory: memoryRes,
    58  		CPU:    cpuRes,
    59  		BlockIO: &specs.BlockIO{
    60  			Weight:                  &blkioWeight,
    61  			WeightDevice:            weightDevices,
    62  			ThrottleReadBpsDevice:   readBpsDevice,
    63  			ThrottleWriteBpsDevice:  writeBpsDevice,
    64  			ThrottleReadIOPSDevice:  readIOpsDevice,
    65  			ThrottleWriteIOPSDevice: writeIOpsDevice,
    66  		},
    67  		DisableOOMKiller: r.OomKillDisable,
    68  		Pids: &specs.Pids{
    69  			Limit: &r.PidsLimit,
    70  		},
    71  	}
    72  
    73  	if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
    74  		specResources.Devices = s.Linux.Resources.Devices
    75  	}
    76  
    77  	s.Linux.Resources = specResources
    78  	return nil
    79  }
    80  
    81  func setDevices(s *specs.Spec, c *container.Container) error {
    82  	// Build lists of devices allowed and created within the container.
    83  	var devs []specs.Device
    84  	devPermissions := s.Linux.Resources.Devices
    85  	if c.HostConfig.Privileged {
    86  		hostDevices, err := devices.HostDevices()
    87  		if err != nil {
    88  			return err
    89  		}
    90  		for _, d := range hostDevices {
    91  			devs = append(devs, oci.Device(d))
    92  		}
    93  		rwm := "rwm"
    94  		devPermissions = []specs.DeviceCgroup{
    95  			{
    96  				Allow:  true,
    97  				Access: &rwm,
    98  			},
    99  		}
   100  	} else {
   101  		for _, deviceMapping := range c.HostConfig.Devices {
   102  			d, dPermissions, err := oci.DevicesFromPath(deviceMapping.PathOnHost, deviceMapping.PathInContainer, deviceMapping.CgroupPermissions)
   103  			if err != nil {
   104  				return err
   105  			}
   106  			devs = append(devs, d...)
   107  			devPermissions = append(devPermissions, dPermissions...)
   108  		}
   109  	}
   110  
   111  	s.Linux.Devices = append(s.Linux.Devices, devs...)
   112  	s.Linux.Resources.Devices = devPermissions
   113  	return nil
   114  }
   115  
   116  func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
   117  	var rlimits []specs.Rlimit
   118  
   119  	// We want to leave the original HostConfig alone so make a copy here
   120  	hostConfig := *c.HostConfig
   121  	// Merge with the daemon defaults
   122  	daemon.mergeUlimits(&hostConfig)
   123  	for _, ul := range hostConfig.Ulimits {
   124  		rlimits = append(rlimits, specs.Rlimit{
   125  			Type: "RLIMIT_" + strings.ToUpper(ul.Name),
   126  			Soft: uint64(ul.Soft),
   127  			Hard: uint64(ul.Hard),
   128  		})
   129  	}
   130  
   131  	s.Process.Rlimits = rlimits
   132  	return nil
   133  }
   134  
   135  func setUser(s *specs.Spec, c *container.Container) error {
   136  	uid, gid, additionalGids, err := getUser(c, c.Config.User)
   137  	if err != nil {
   138  		return err
   139  	}
   140  	s.Process.User.UID = uid
   141  	s.Process.User.GID = gid
   142  	s.Process.User.AdditionalGids = additionalGids
   143  	return nil
   144  }
   145  
   146  func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
   147  	fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  	return os.Open(fp)
   152  }
   153  
   154  func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
   155  	passwdPath, err := user.GetPasswdPath()
   156  	if err != nil {
   157  		return 0, 0, nil, err
   158  	}
   159  	groupPath, err := user.GetGroupPath()
   160  	if err != nil {
   161  		return 0, 0, nil, err
   162  	}
   163  	passwdFile, err := readUserFile(c, passwdPath)
   164  	if err == nil {
   165  		defer passwdFile.Close()
   166  	}
   167  	groupFile, err := readUserFile(c, groupPath)
   168  	if err == nil {
   169  		defer groupFile.Close()
   170  	}
   171  
   172  	execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
   173  	if err != nil {
   174  		return 0, 0, nil, err
   175  	}
   176  
   177  	// todo: fix this double read by a change to libcontainer/user pkg
   178  	groupFile, err = readUserFile(c, groupPath)
   179  	if err == nil {
   180  		defer groupFile.Close()
   181  	}
   182  	var addGroups []int
   183  	if len(c.HostConfig.GroupAdd) > 0 {
   184  		addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
   185  		if err != nil {
   186  			return 0, 0, nil, err
   187  		}
   188  	}
   189  	uid := uint32(execUser.Uid)
   190  	gid := uint32(execUser.Gid)
   191  	sgids := append(execUser.Sgids, addGroups...)
   192  	var additionalGids []uint32
   193  	for _, g := range sgids {
   194  		additionalGids = append(additionalGids, uint32(g))
   195  	}
   196  	return uid, gid, additionalGids, nil
   197  }
   198  
   199  func setNamespace(s *specs.Spec, ns specs.Namespace) {
   200  	for i, n := range s.Linux.Namespaces {
   201  		if n.Type == ns.Type {
   202  			s.Linux.Namespaces[i] = ns
   203  			return
   204  		}
   205  	}
   206  	s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
   207  }
   208  
   209  func setCapabilities(s *specs.Spec, c *container.Container) error {
   210  	var caplist []string
   211  	var err error
   212  	if c.HostConfig.Privileged {
   213  		caplist = caps.GetAllCapabilities()
   214  	} else {
   215  		caplist, err = caps.TweakCapabilities(s.Process.Capabilities, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
   216  		if err != nil {
   217  			return err
   218  		}
   219  	}
   220  	s.Process.Capabilities = caplist
   221  	return nil
   222  }
   223  
   224  func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
   225  	userNS := false
   226  	// user
   227  	if c.HostConfig.UsernsMode.IsPrivate() {
   228  		uidMap, gidMap := daemon.GetUIDGIDMaps()
   229  		if uidMap != nil {
   230  			userNS = true
   231  			ns := specs.Namespace{Type: "user"}
   232  			setNamespace(s, ns)
   233  			s.Linux.UIDMappings = specMapping(uidMap)
   234  			s.Linux.GIDMappings = specMapping(gidMap)
   235  		}
   236  	}
   237  	// network
   238  	if !c.Config.NetworkDisabled {
   239  		ns := specs.Namespace{Type: "network"}
   240  		parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
   241  		if parts[0] == "container" {
   242  			nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
   243  			if err != nil {
   244  				return err
   245  			}
   246  			ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
   247  			if userNS {
   248  				// to share a net namespace, they must also share a user namespace
   249  				nsUser := specs.Namespace{Type: "user"}
   250  				nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
   251  				setNamespace(s, nsUser)
   252  			}
   253  		} else if c.HostConfig.NetworkMode.IsHost() {
   254  			ns.Path = c.NetworkSettings.SandboxKey
   255  		}
   256  		setNamespace(s, ns)
   257  	}
   258  	// ipc
   259  	if c.HostConfig.IpcMode.IsContainer() {
   260  		ns := specs.Namespace{Type: "ipc"}
   261  		ic, err := daemon.getIpcContainer(c)
   262  		if err != nil {
   263  			return err
   264  		}
   265  		ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
   266  		setNamespace(s, ns)
   267  		if userNS {
   268  			// to share an IPC namespace, they must also share a user namespace
   269  			nsUser := specs.Namespace{Type: "user"}
   270  			nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
   271  			setNamespace(s, nsUser)
   272  		}
   273  	} else if c.HostConfig.IpcMode.IsHost() {
   274  		oci.RemoveNamespace(s, specs.NamespaceType("ipc"))
   275  	} else {
   276  		ns := specs.Namespace{Type: "ipc"}
   277  		setNamespace(s, ns)
   278  	}
   279  	// pid
   280  	if c.HostConfig.PidMode.IsContainer() {
   281  		ns := specs.Namespace{Type: "pid"}
   282  		pc, err := daemon.getPidContainer(c)
   283  		if err != nil {
   284  			return err
   285  		}
   286  		ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
   287  		setNamespace(s, ns)
   288  		if userNS {
   289  			// to share a PID namespace, they must also share a user namespace
   290  			nsUser := specs.Namespace{Type: "user"}
   291  			nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
   292  			setNamespace(s, nsUser)
   293  		}
   294  	} else if c.HostConfig.PidMode.IsHost() {
   295  		oci.RemoveNamespace(s, specs.NamespaceType("pid"))
   296  	} else {
   297  		ns := specs.Namespace{Type: "pid"}
   298  		setNamespace(s, ns)
   299  	}
   300  	// uts
   301  	if c.HostConfig.UTSMode.IsHost() {
   302  		oci.RemoveNamespace(s, specs.NamespaceType("uts"))
   303  		s.Hostname = ""
   304  	}
   305  
   306  	return nil
   307  }
   308  
   309  func specMapping(s []idtools.IDMap) []specs.IDMapping {
   310  	var ids []specs.IDMapping
   311  	for _, item := range s {
   312  		ids = append(ids, specs.IDMapping{
   313  			HostID:      uint32(item.HostID),
   314  			ContainerID: uint32(item.ContainerID),
   315  			Size:        uint32(item.Size),
   316  		})
   317  	}
   318  	return ids
   319  }
   320  
   321  func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
   322  	for _, m := range mountinfo {
   323  		if m.Mountpoint == dir {
   324  			return m
   325  		}
   326  	}
   327  	return nil
   328  }
   329  
   330  // Get the source mount point of directory passed in as argument. Also return
   331  // optional fields.
   332  func getSourceMount(source string) (string, string, error) {
   333  	// Ensure any symlinks are resolved.
   334  	sourcePath, err := filepath.EvalSymlinks(source)
   335  	if err != nil {
   336  		return "", "", err
   337  	}
   338  
   339  	mountinfos, err := mount.GetMounts()
   340  	if err != nil {
   341  		return "", "", err
   342  	}
   343  
   344  	mountinfo := getMountInfo(mountinfos, sourcePath)
   345  	if mountinfo != nil {
   346  		return sourcePath, mountinfo.Optional, nil
   347  	}
   348  
   349  	path := sourcePath
   350  	for {
   351  		path = filepath.Dir(path)
   352  
   353  		mountinfo = getMountInfo(mountinfos, path)
   354  		if mountinfo != nil {
   355  			return path, mountinfo.Optional, nil
   356  		}
   357  
   358  		if path == "/" {
   359  			break
   360  		}
   361  	}
   362  
   363  	// If we are here, we did not find parent mount. Something is wrong.
   364  	return "", "", fmt.Errorf("Could not find source mount of %s", source)
   365  }
   366  
   367  // Ensure mount point on which path is mounted, is shared.
   368  func ensureShared(path string) error {
   369  	sharedMount := false
   370  
   371  	sourceMount, optionalOpts, err := getSourceMount(path)
   372  	if err != nil {
   373  		return err
   374  	}
   375  	// Make sure source mount point is shared.
   376  	optsSplit := strings.Split(optionalOpts, " ")
   377  	for _, opt := range optsSplit {
   378  		if strings.HasPrefix(opt, "shared:") {
   379  			sharedMount = true
   380  			break
   381  		}
   382  	}
   383  
   384  	if !sharedMount {
   385  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
   386  	}
   387  	return nil
   388  }
   389  
   390  // Ensure mount point on which path is mounted, is either shared or slave.
   391  func ensureSharedOrSlave(path string) error {
   392  	sharedMount := false
   393  	slaveMount := false
   394  
   395  	sourceMount, optionalOpts, err := getSourceMount(path)
   396  	if err != nil {
   397  		return err
   398  	}
   399  	// Make sure source mount point is shared.
   400  	optsSplit := strings.Split(optionalOpts, " ")
   401  	for _, opt := range optsSplit {
   402  		if strings.HasPrefix(opt, "shared:") {
   403  			sharedMount = true
   404  			break
   405  		} else if strings.HasPrefix(opt, "master:") {
   406  			slaveMount = true
   407  			break
   408  		}
   409  	}
   410  
   411  	if !sharedMount && !slaveMount {
   412  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
   413  	}
   414  	return nil
   415  }
   416  
   417  var (
   418  	mountPropagationMap = map[string]int{
   419  		"private":  mount.PRIVATE,
   420  		"rprivate": mount.RPRIVATE,
   421  		"shared":   mount.SHARED,
   422  		"rshared":  mount.RSHARED,
   423  		"slave":    mount.SLAVE,
   424  		"rslave":   mount.RSLAVE,
   425  	}
   426  
   427  	mountPropagationReverseMap = map[int]string{
   428  		mount.PRIVATE:  "private",
   429  		mount.RPRIVATE: "rprivate",
   430  		mount.SHARED:   "shared",
   431  		mount.RSHARED:  "rshared",
   432  		mount.SLAVE:    "slave",
   433  		mount.RSLAVE:   "rslave",
   434  	}
   435  )
   436  
   437  func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
   438  	userMounts := make(map[string]struct{})
   439  	for _, m := range mounts {
   440  		userMounts[m.Destination] = struct{}{}
   441  	}
   442  
   443  	// Filter out mounts that are overridden by user supplied mounts
   444  	var defaultMounts []specs.Mount
   445  	_, mountDev := userMounts["/dev"]
   446  	for _, m := range s.Mounts {
   447  		if _, ok := userMounts[m.Destination]; !ok {
   448  			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
   449  				continue
   450  			}
   451  			defaultMounts = append(defaultMounts, m)
   452  		}
   453  	}
   454  
   455  	s.Mounts = defaultMounts
   456  	for _, m := range mounts {
   457  		for _, cm := range s.Mounts {
   458  			if cm.Destination == m.Destination {
   459  				return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
   460  			}
   461  		}
   462  
   463  		if m.Source == "tmpfs" {
   464  			data := m.Data
   465  			options := []string{"noexec", "nosuid", "nodev", string(volume.DefaultPropagationMode)}
   466  			if data != "" {
   467  				options = append(options, strings.Split(data, ",")...)
   468  			}
   469  
   470  			merged, err := mount.MergeTmpfsOptions(options)
   471  			if err != nil {
   472  				return err
   473  			}
   474  
   475  			s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged})
   476  			continue
   477  		}
   478  
   479  		mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
   480  
   481  		// Determine property of RootPropagation based on volume
   482  		// properties. If a volume is shared, then keep root propagation
   483  		// shared. This should work for slave and private volumes too.
   484  		//
   485  		// For slave volumes, it can be either [r]shared/[r]slave.
   486  		//
   487  		// For private volumes any root propagation value should work.
   488  		pFlag := mountPropagationMap[m.Propagation]
   489  		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
   490  			if err := ensureShared(m.Source); err != nil {
   491  				return err
   492  			}
   493  			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
   494  			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
   495  				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
   496  			}
   497  		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
   498  			if err := ensureSharedOrSlave(m.Source); err != nil {
   499  				return err
   500  			}
   501  			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
   502  			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
   503  				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
   504  			}
   505  		}
   506  
   507  		opts := []string{"rbind"}
   508  		if !m.Writable {
   509  			opts = append(opts, "ro")
   510  		}
   511  		if pFlag != 0 {
   512  			opts = append(opts, mountPropagationReverseMap[pFlag])
   513  		}
   514  
   515  		mt.Options = opts
   516  		s.Mounts = append(s.Mounts, mt)
   517  	}
   518  
   519  	if s.Root.Readonly {
   520  		for i, m := range s.Mounts {
   521  			switch m.Destination {
   522  			case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
   523  				continue
   524  			}
   525  			if _, ok := userMounts[m.Destination]; !ok {
   526  				if !stringutils.InSlice(m.Options, "ro") {
   527  					s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
   528  				}
   529  			}
   530  		}
   531  	}
   532  
   533  	if c.HostConfig.Privileged {
   534  		if !s.Root.Readonly {
   535  			// clear readonly for /sys
   536  			for i := range s.Mounts {
   537  				if s.Mounts[i].Destination == "/sys" {
   538  					clearReadOnly(&s.Mounts[i])
   539  				}
   540  			}
   541  		}
   542  		s.Linux.ReadonlyPaths = nil
   543  		s.Linux.MaskedPaths = nil
   544  	}
   545  
   546  	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
   547  	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
   548  	if uidMap, _ := daemon.GetUIDGIDMaps(); uidMap != nil || c.HostConfig.Privileged {
   549  		for i, m := range s.Mounts {
   550  			if m.Type == "cgroup" {
   551  				clearReadOnly(&s.Mounts[i])
   552  			}
   553  		}
   554  	}
   555  
   556  	return nil
   557  }
   558  
   559  func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
   560  	linkedEnv, err := daemon.setupLinkedContainers(c)
   561  	if err != nil {
   562  		return err
   563  	}
   564  	s.Root = specs.Root{
   565  		Path:     c.BaseFS,
   566  		Readonly: c.HostConfig.ReadonlyRootfs,
   567  	}
   568  	rootUID, rootGID := daemon.GetRemappedUIDGID()
   569  	if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil {
   570  		return err
   571  	}
   572  	cwd := c.Config.WorkingDir
   573  	if len(cwd) == 0 {
   574  		cwd = "/"
   575  	}
   576  	s.Process.Args = append([]string{c.Path}, c.Args...)
   577  
   578  	// only add the custom init if it is specified and the container is running in its
   579  	// own private pid namespace.  It does not make sense to add if it is running in the
   580  	// host namespace or another container's pid namespace where we already have an init
   581  	if c.HostConfig.PidMode.IsPrivate() {
   582  		if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
   583  			(c.HostConfig.Init == nil && daemon.configStore.Init) {
   584  			s.Process.Args = append([]string{"/dev/init", c.Path}, c.Args...)
   585  			var path string
   586  			if daemon.configStore.InitPath == "" && c.HostConfig.InitPath == "" {
   587  				path, err = exec.LookPath(DefaultInitBinary)
   588  				if err != nil {
   589  					return err
   590  				}
   591  			}
   592  			if daemon.configStore.InitPath != "" {
   593  				path = daemon.configStore.InitPath
   594  			}
   595  			if c.HostConfig.InitPath != "" {
   596  				path = c.HostConfig.InitPath
   597  			}
   598  			s.Mounts = append(s.Mounts, specs.Mount{
   599  				Destination: "/dev/init",
   600  				Type:        "bind",
   601  				Source:      path,
   602  				Options:     []string{"bind", "ro"},
   603  			})
   604  		}
   605  	}
   606  	s.Process.Cwd = cwd
   607  	s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
   608  	s.Process.Terminal = c.Config.Tty
   609  	s.Hostname = c.FullHostname()
   610  
   611  	return nil
   612  }
   613  
   614  func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
   615  	s := oci.DefaultSpec()
   616  	if err := daemon.populateCommonSpec(&s, c); err != nil {
   617  		return nil, err
   618  	}
   619  
   620  	var cgroupsPath string
   621  	scopePrefix := "docker"
   622  	parent := "/docker"
   623  	useSystemd := UsingSystemd(daemon.configStore)
   624  	if useSystemd {
   625  		parent = "system.slice"
   626  	}
   627  
   628  	if c.HostConfig.CgroupParent != "" {
   629  		parent = c.HostConfig.CgroupParent
   630  	} else if daemon.configStore.CgroupParent != "" {
   631  		parent = daemon.configStore.CgroupParent
   632  	}
   633  
   634  	if useSystemd {
   635  		cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
   636  		logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
   637  	} else {
   638  		cgroupsPath = filepath.Join(parent, c.ID)
   639  	}
   640  	s.Linux.CgroupsPath = &cgroupsPath
   641  
   642  	if err := setResources(&s, c.HostConfig.Resources); err != nil {
   643  		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
   644  	}
   645  	s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
   646  	s.Linux.Sysctl = c.HostConfig.Sysctls
   647  
   648  	p := *s.Linux.CgroupsPath
   649  	if useSystemd {
   650  		initPath, err := cgroups.GetInitCgroupDir("cpu")
   651  		if err != nil {
   652  			return nil, err
   653  		}
   654  		p, _ = cgroups.GetThisCgroupDir("cpu")
   655  		if err != nil {
   656  			return nil, err
   657  		}
   658  		p = filepath.Join(initPath, p)
   659  	}
   660  
   661  	// Clean path to guard against things like ../../../BAD
   662  	parentPath := filepath.Dir(p)
   663  	if !filepath.IsAbs(parentPath) {
   664  		parentPath = filepath.Clean("/" + parentPath)
   665  	}
   666  
   667  	if err := daemon.initCgroupsPath(parentPath); err != nil {
   668  		return nil, fmt.Errorf("linux init cgroups path: %v", err)
   669  	}
   670  	if err := setDevices(&s, c); err != nil {
   671  		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
   672  	}
   673  	if err := setRlimits(daemon, &s, c); err != nil {
   674  		return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
   675  	}
   676  	if err := setUser(&s, c); err != nil {
   677  		return nil, fmt.Errorf("linux spec user: %v", err)
   678  	}
   679  	if err := setNamespaces(daemon, &s, c); err != nil {
   680  		return nil, fmt.Errorf("linux spec namespaces: %v", err)
   681  	}
   682  	if err := setCapabilities(&s, c); err != nil {
   683  		return nil, fmt.Errorf("linux spec capabilities: %v", err)
   684  	}
   685  	if err := setSeccomp(daemon, &s, c); err != nil {
   686  		return nil, fmt.Errorf("linux seccomp: %v", err)
   687  	}
   688  
   689  	if err := daemon.setupIpcDirs(c); err != nil {
   690  		return nil, err
   691  	}
   692  
   693  	if err := daemon.setupSecretDir(c); err != nil {
   694  		return nil, err
   695  	}
   696  
   697  	ms, err := daemon.setupMounts(c)
   698  	if err != nil {
   699  		return nil, err
   700  	}
   701  
   702  	ms = append(ms, c.IpcMounts()...)
   703  
   704  	tmpfsMounts, err := c.TmpfsMounts()
   705  	if err != nil {
   706  		return nil, err
   707  	}
   708  	ms = append(ms, tmpfsMounts...)
   709  
   710  	if m := c.SecretMount(); m != nil {
   711  		ms = append(ms, *m)
   712  	}
   713  
   714  	sort.Sort(mounts(ms))
   715  	if err := setMounts(daemon, &s, c, ms); err != nil {
   716  		return nil, fmt.Errorf("linux mounts: %v", err)
   717  	}
   718  
   719  	for _, ns := range s.Linux.Namespaces {
   720  		if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
   721  			target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
   722  			if err != nil {
   723  				return nil, err
   724  			}
   725  
   726  			s.Hooks = specs.Hooks{
   727  				Prestart: []specs.Hook{{
   728  					Path: target, // FIXME: cross-platform
   729  					Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
   730  				}},
   731  			}
   732  		}
   733  	}
   734  
   735  	if apparmor.IsEnabled() {
   736  		var appArmorProfile string
   737  		if c.AppArmorProfile != "" {
   738  			appArmorProfile = c.AppArmorProfile
   739  		} else if c.HostConfig.Privileged {
   740  			appArmorProfile = "unconfined"
   741  		} else {
   742  			appArmorProfile = "docker-default"
   743  		}
   744  
   745  		if appArmorProfile == "docker-default" {
   746  			// Unattended upgrades and other fun services can unload AppArmor
   747  			// profiles inadvertently. Since we cannot store our profile in
   748  			// /etc/apparmor.d, nor can we practically add other ways of
   749  			// telling the system to keep our profile loaded, in order to make
   750  			// sure that we keep the default profile enabled we dynamically
   751  			// reload it if necessary.
   752  			if err := ensureDefaultAppArmorProfile(); err != nil {
   753  				return nil, err
   754  			}
   755  		}
   756  
   757  		s.Process.ApparmorProfile = appArmorProfile
   758  	}
   759  	s.Process.SelinuxLabel = c.GetProcessLabel()
   760  	s.Process.NoNewPrivileges = c.NoNewPrivileges
   761  	s.Linux.MountLabel = c.MountLabel
   762  
   763  	return (*specs.Spec)(&s), nil
   764  }
   765  
   766  func clearReadOnly(m *specs.Mount) {
   767  	var opt []string
   768  	for _, o := range m.Options {
   769  		if o != "ro" {
   770  			opt = append(opt, o)
   771  		}
   772  	}
   773  	m.Options = opt
   774  }
   775  
   776  // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
   777  func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
   778  	ulimits := c.Ulimits
   779  	// Merge ulimits with daemon defaults
   780  	ulIdx := make(map[string]struct{})
   781  	for _, ul := range ulimits {
   782  		ulIdx[ul.Name] = struct{}{}
   783  	}
   784  	for name, ul := range daemon.configStore.Ulimits {
   785  		if _, exists := ulIdx[name]; !exists {
   786  			ulimits = append(ulimits, ul)
   787  		}
   788  	}
   789  	c.Ulimits = ulimits
   790  }