github.com/endophage/docker@v1.4.2-0.20161027011718-242853499895/daemon/oci_linux.go (about)

     1  package daemon
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"sort"
    10  	"strconv"
    11  	"strings"
    12  
    13  	"github.com/Sirupsen/logrus"
    14  	containertypes "github.com/docker/docker/api/types/container"
    15  	"github.com/docker/docker/container"
    16  	"github.com/docker/docker/daemon/caps"
    17  	"github.com/docker/docker/oci"
    18  	"github.com/docker/docker/pkg/idtools"
    19  	"github.com/docker/docker/pkg/mount"
    20  	"github.com/docker/docker/pkg/stringutils"
    21  	"github.com/docker/docker/pkg/symlink"
    22  	"github.com/docker/docker/volume"
    23  	"github.com/opencontainers/runc/libcontainer/apparmor"
    24  	"github.com/opencontainers/runc/libcontainer/cgroups"
    25  	"github.com/opencontainers/runc/libcontainer/devices"
    26  	"github.com/opencontainers/runc/libcontainer/user"
    27  	specs "github.com/opencontainers/runtime-spec/specs-go"
    28  )
    29  
    30  func setResources(s *specs.Spec, r containertypes.Resources) error {
    31  	weightDevices, err := getBlkioWeightDevices(r)
    32  	if err != nil {
    33  		return err
    34  	}
    35  	readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps)
    36  	if err != nil {
    37  		return err
    38  	}
    39  	writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps)
    40  	if err != nil {
    41  		return err
    42  	}
    43  	readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps)
    44  	if err != nil {
    45  		return err
    46  	}
    47  	writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps)
    48  	if err != nil {
    49  		return err
    50  	}
    51  
    52  	memoryRes := getMemoryResources(r)
    53  	cpuRes := getCPUResources(r)
    54  	blkioWeight := r.BlkioWeight
    55  
    56  	specResources := &specs.Resources{
    57  		Memory: memoryRes,
    58  		CPU:    cpuRes,
    59  		BlockIO: &specs.BlockIO{
    60  			Weight:                  &blkioWeight,
    61  			WeightDevice:            weightDevices,
    62  			ThrottleReadBpsDevice:   readBpsDevice,
    63  			ThrottleWriteBpsDevice:  writeBpsDevice,
    64  			ThrottleReadIOPSDevice:  readIOpsDevice,
    65  			ThrottleWriteIOPSDevice: writeIOpsDevice,
    66  		},
    67  		DisableOOMKiller: r.OomKillDisable,
    68  		Pids: &specs.Pids{
    69  			Limit: &r.PidsLimit,
    70  		},
    71  	}
    72  
    73  	if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
    74  		specResources.Devices = s.Linux.Resources.Devices
    75  	}
    76  
    77  	s.Linux.Resources = specResources
    78  	return nil
    79  }
    80  
    81  func setDevices(s *specs.Spec, c *container.Container) error {
    82  	// Build lists of devices allowed and created within the container.
    83  	var devs []specs.Device
    84  	devPermissions := s.Linux.Resources.Devices
    85  	if c.HostConfig.Privileged {
    86  		hostDevices, err := devices.HostDevices()
    87  		if err != nil {
    88  			return err
    89  		}
    90  		for _, d := range hostDevices {
    91  			devs = append(devs, specDevice(d))
    92  		}
    93  		rwm := "rwm"
    94  		devPermissions = []specs.DeviceCgroup{
    95  			{
    96  				Allow:  true,
    97  				Access: &rwm,
    98  			},
    99  		}
   100  	} else {
   101  		for _, deviceMapping := range c.HostConfig.Devices {
   102  			d, dPermissions, err := getDevicesFromPath(deviceMapping)
   103  			if err != nil {
   104  				return err
   105  			}
   106  			devs = append(devs, d...)
   107  			devPermissions = append(devPermissions, dPermissions...)
   108  		}
   109  	}
   110  
   111  	s.Linux.Devices = append(s.Linux.Devices, devs...)
   112  	s.Linux.Resources.Devices = devPermissions
   113  	return nil
   114  }
   115  
   116  func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
   117  	var rlimits []specs.Rlimit
   118  
   119  	// We want to leave the original HostConfig alone so make a copy here
   120  	hostConfig := *c.HostConfig
   121  	// Merge with the daemon defaults
   122  	daemon.mergeUlimits(&hostConfig)
   123  	for _, ul := range hostConfig.Ulimits {
   124  		rlimits = append(rlimits, specs.Rlimit{
   125  			Type: "RLIMIT_" + strings.ToUpper(ul.Name),
   126  			Soft: uint64(ul.Soft),
   127  			Hard: uint64(ul.Hard),
   128  		})
   129  	}
   130  
   131  	s.Process.Rlimits = rlimits
   132  	return nil
   133  }
   134  
   135  func setUser(s *specs.Spec, c *container.Container) error {
   136  	uid, gid, additionalGids, err := getUser(c, c.Config.User)
   137  	if err != nil {
   138  		return err
   139  	}
   140  	s.Process.User.UID = uid
   141  	s.Process.User.GID = gid
   142  	s.Process.User.AdditionalGids = additionalGids
   143  	return nil
   144  }
   145  
   146  func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
   147  	fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  	return os.Open(fp)
   152  }
   153  
   154  func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
   155  	passwdPath, err := user.GetPasswdPath()
   156  	if err != nil {
   157  		return 0, 0, nil, err
   158  	}
   159  	groupPath, err := user.GetGroupPath()
   160  	if err != nil {
   161  		return 0, 0, nil, err
   162  	}
   163  	passwdFile, err := readUserFile(c, passwdPath)
   164  	if err == nil {
   165  		defer passwdFile.Close()
   166  	}
   167  	groupFile, err := readUserFile(c, groupPath)
   168  	if err == nil {
   169  		defer groupFile.Close()
   170  	}
   171  
   172  	execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
   173  	if err != nil {
   174  		return 0, 0, nil, err
   175  	}
   176  
   177  	// todo: fix this double read by a change to libcontainer/user pkg
   178  	groupFile, err = readUserFile(c, groupPath)
   179  	if err == nil {
   180  		defer groupFile.Close()
   181  	}
   182  	var addGroups []int
   183  	if len(c.HostConfig.GroupAdd) > 0 {
   184  		addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
   185  		if err != nil {
   186  			return 0, 0, nil, err
   187  		}
   188  	}
   189  	uid := uint32(execUser.Uid)
   190  	gid := uint32(execUser.Gid)
   191  	sgids := append(execUser.Sgids, addGroups...)
   192  	var additionalGids []uint32
   193  	for _, g := range sgids {
   194  		additionalGids = append(additionalGids, uint32(g))
   195  	}
   196  	return uid, gid, additionalGids, nil
   197  }
   198  
   199  func setNamespace(s *specs.Spec, ns specs.Namespace) {
   200  	for i, n := range s.Linux.Namespaces {
   201  		if n.Type == ns.Type {
   202  			s.Linux.Namespaces[i] = ns
   203  			return
   204  		}
   205  	}
   206  	s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
   207  }
   208  
   209  func setCapabilities(s *specs.Spec, c *container.Container) error {
   210  	var caplist []string
   211  	var err error
   212  	if c.HostConfig.Privileged {
   213  		caplist = caps.GetAllCapabilities()
   214  	} else {
   215  		caplist, err = caps.TweakCapabilities(s.Process.Capabilities, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
   216  		if err != nil {
   217  			return err
   218  		}
   219  	}
   220  	s.Process.Capabilities = caplist
   221  	return nil
   222  }
   223  
   224  func delNamespace(s *specs.Spec, nsType specs.NamespaceType) {
   225  	idx := -1
   226  	for i, n := range s.Linux.Namespaces {
   227  		if n.Type == nsType {
   228  			idx = i
   229  		}
   230  	}
   231  	if idx >= 0 {
   232  		s.Linux.Namespaces = append(s.Linux.Namespaces[:idx], s.Linux.Namespaces[idx+1:]...)
   233  	}
   234  }
   235  
   236  func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
   237  	userNS := false
   238  	// user
   239  	if c.HostConfig.UsernsMode.IsPrivate() {
   240  		uidMap, gidMap := daemon.GetUIDGIDMaps()
   241  		if uidMap != nil {
   242  			userNS = true
   243  			ns := specs.Namespace{Type: "user"}
   244  			setNamespace(s, ns)
   245  			s.Linux.UIDMappings = specMapping(uidMap)
   246  			s.Linux.GIDMappings = specMapping(gidMap)
   247  		}
   248  	}
   249  	// network
   250  	if !c.Config.NetworkDisabled {
   251  		ns := specs.Namespace{Type: "network"}
   252  		parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
   253  		if parts[0] == "container" {
   254  			nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
   255  			if err != nil {
   256  				return err
   257  			}
   258  			ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
   259  			if userNS {
   260  				// to share a net namespace, they must also share a user namespace
   261  				nsUser := specs.Namespace{Type: "user"}
   262  				nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
   263  				setNamespace(s, nsUser)
   264  			}
   265  		} else if c.HostConfig.NetworkMode.IsHost() {
   266  			ns.Path = c.NetworkSettings.SandboxKey
   267  		}
   268  		setNamespace(s, ns)
   269  	}
   270  	// ipc
   271  	if c.HostConfig.IpcMode.IsContainer() {
   272  		ns := specs.Namespace{Type: "ipc"}
   273  		ic, err := daemon.getIpcContainer(c)
   274  		if err != nil {
   275  			return err
   276  		}
   277  		ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
   278  		setNamespace(s, ns)
   279  		if userNS {
   280  			// to share an IPC namespace, they must also share a user namespace
   281  			nsUser := specs.Namespace{Type: "user"}
   282  			nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
   283  			setNamespace(s, nsUser)
   284  		}
   285  	} else if c.HostConfig.IpcMode.IsHost() {
   286  		delNamespace(s, specs.NamespaceType("ipc"))
   287  	} else {
   288  		ns := specs.Namespace{Type: "ipc"}
   289  		setNamespace(s, ns)
   290  	}
   291  	// pid
   292  	if c.HostConfig.PidMode.IsContainer() {
   293  		ns := specs.Namespace{Type: "pid"}
   294  		pc, err := daemon.getPidContainer(c)
   295  		if err != nil {
   296  			return err
   297  		}
   298  		ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
   299  		setNamespace(s, ns)
   300  		if userNS {
   301  			// to share a PID namespace, they must also share a user namespace
   302  			nsUser := specs.Namespace{Type: "user"}
   303  			nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
   304  			setNamespace(s, nsUser)
   305  		}
   306  	} else if c.HostConfig.PidMode.IsHost() {
   307  		delNamespace(s, specs.NamespaceType("pid"))
   308  	} else {
   309  		ns := specs.Namespace{Type: "pid"}
   310  		setNamespace(s, ns)
   311  	}
   312  	// uts
   313  	if c.HostConfig.UTSMode.IsHost() {
   314  		delNamespace(s, specs.NamespaceType("uts"))
   315  		s.Hostname = ""
   316  	}
   317  
   318  	return nil
   319  }
   320  
   321  func specMapping(s []idtools.IDMap) []specs.IDMapping {
   322  	var ids []specs.IDMapping
   323  	for _, item := range s {
   324  		ids = append(ids, specs.IDMapping{
   325  			HostID:      uint32(item.HostID),
   326  			ContainerID: uint32(item.ContainerID),
   327  			Size:        uint32(item.Size),
   328  		})
   329  	}
   330  	return ids
   331  }
   332  
   333  func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
   334  	for _, m := range mountinfo {
   335  		if m.Mountpoint == dir {
   336  			return m
   337  		}
   338  	}
   339  	return nil
   340  }
   341  
   342  // Get the source mount point of directory passed in as argument. Also return
   343  // optional fields.
   344  func getSourceMount(source string) (string, string, error) {
   345  	// Ensure any symlinks are resolved.
   346  	sourcePath, err := filepath.EvalSymlinks(source)
   347  	if err != nil {
   348  		return "", "", err
   349  	}
   350  
   351  	mountinfos, err := mount.GetMounts()
   352  	if err != nil {
   353  		return "", "", err
   354  	}
   355  
   356  	mountinfo := getMountInfo(mountinfos, sourcePath)
   357  	if mountinfo != nil {
   358  		return sourcePath, mountinfo.Optional, nil
   359  	}
   360  
   361  	path := sourcePath
   362  	for {
   363  		path = filepath.Dir(path)
   364  
   365  		mountinfo = getMountInfo(mountinfos, path)
   366  		if mountinfo != nil {
   367  			return path, mountinfo.Optional, nil
   368  		}
   369  
   370  		if path == "/" {
   371  			break
   372  		}
   373  	}
   374  
   375  	// If we are here, we did not find parent mount. Something is wrong.
   376  	return "", "", fmt.Errorf("Could not find source mount of %s", source)
   377  }
   378  
   379  // Ensure mount point on which path is mounted, is shared.
   380  func ensureShared(path string) error {
   381  	sharedMount := false
   382  
   383  	sourceMount, optionalOpts, err := getSourceMount(path)
   384  	if err != nil {
   385  		return err
   386  	}
   387  	// Make sure source mount point is shared.
   388  	optsSplit := strings.Split(optionalOpts, " ")
   389  	for _, opt := range optsSplit {
   390  		if strings.HasPrefix(opt, "shared:") {
   391  			sharedMount = true
   392  			break
   393  		}
   394  	}
   395  
   396  	if !sharedMount {
   397  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
   398  	}
   399  	return nil
   400  }
   401  
   402  // Ensure mount point on which path is mounted, is either shared or slave.
   403  func ensureSharedOrSlave(path string) error {
   404  	sharedMount := false
   405  	slaveMount := false
   406  
   407  	sourceMount, optionalOpts, err := getSourceMount(path)
   408  	if err != nil {
   409  		return err
   410  	}
   411  	// Make sure source mount point is shared.
   412  	optsSplit := strings.Split(optionalOpts, " ")
   413  	for _, opt := range optsSplit {
   414  		if strings.HasPrefix(opt, "shared:") {
   415  			sharedMount = true
   416  			break
   417  		} else if strings.HasPrefix(opt, "master:") {
   418  			slaveMount = true
   419  			break
   420  		}
   421  	}
   422  
   423  	if !sharedMount && !slaveMount {
   424  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
   425  	}
   426  	return nil
   427  }
   428  
   429  var (
   430  	mountPropagationMap = map[string]int{
   431  		"private":  mount.PRIVATE,
   432  		"rprivate": mount.RPRIVATE,
   433  		"shared":   mount.SHARED,
   434  		"rshared":  mount.RSHARED,
   435  		"slave":    mount.SLAVE,
   436  		"rslave":   mount.RSLAVE,
   437  	}
   438  
   439  	mountPropagationReverseMap = map[int]string{
   440  		mount.PRIVATE:  "private",
   441  		mount.RPRIVATE: "rprivate",
   442  		mount.SHARED:   "shared",
   443  		mount.RSHARED:  "rshared",
   444  		mount.SLAVE:    "slave",
   445  		mount.RSLAVE:   "rslave",
   446  	}
   447  )
   448  
   449  func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
   450  	userMounts := make(map[string]struct{})
   451  	for _, m := range mounts {
   452  		userMounts[m.Destination] = struct{}{}
   453  	}
   454  
   455  	// Filter out mounts that are overridden by user supplied mounts
   456  	var defaultMounts []specs.Mount
   457  	_, mountDev := userMounts["/dev"]
   458  	for _, m := range s.Mounts {
   459  		if _, ok := userMounts[m.Destination]; !ok {
   460  			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
   461  				continue
   462  			}
   463  			defaultMounts = append(defaultMounts, m)
   464  		}
   465  	}
   466  
   467  	s.Mounts = defaultMounts
   468  	for _, m := range mounts {
   469  		for _, cm := range s.Mounts {
   470  			if cm.Destination == m.Destination {
   471  				return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
   472  			}
   473  		}
   474  
   475  		if m.Source == "tmpfs" {
   476  			data := c.HostConfig.Tmpfs[m.Destination]
   477  			options := []string{"noexec", "nosuid", "nodev", string(volume.DefaultPropagationMode)}
   478  			if data != "" {
   479  				options = append(options, strings.Split(data, ",")...)
   480  			}
   481  
   482  			merged, err := mount.MergeTmpfsOptions(options)
   483  			if err != nil {
   484  				return err
   485  			}
   486  
   487  			s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged})
   488  			continue
   489  		}
   490  
   491  		mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
   492  
   493  		// Determine property of RootPropagation based on volume
   494  		// properties. If a volume is shared, then keep root propagation
   495  		// shared. This should work for slave and private volumes too.
   496  		//
   497  		// For slave volumes, it can be either [r]shared/[r]slave.
   498  		//
   499  		// For private volumes any root propagation value should work.
   500  		pFlag := mountPropagationMap[m.Propagation]
   501  		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
   502  			if err := ensureShared(m.Source); err != nil {
   503  				return err
   504  			}
   505  			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
   506  			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
   507  				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
   508  			}
   509  		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
   510  			if err := ensureSharedOrSlave(m.Source); err != nil {
   511  				return err
   512  			}
   513  			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
   514  			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
   515  				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
   516  			}
   517  		}
   518  
   519  		opts := []string{"rbind"}
   520  		if !m.Writable {
   521  			opts = append(opts, "ro")
   522  		}
   523  		if pFlag != 0 {
   524  			opts = append(opts, mountPropagationReverseMap[pFlag])
   525  		}
   526  
   527  		mt.Options = opts
   528  		s.Mounts = append(s.Mounts, mt)
   529  	}
   530  
   531  	if s.Root.Readonly {
   532  		for i, m := range s.Mounts {
   533  			switch m.Destination {
   534  			case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
   535  				continue
   536  			}
   537  			if _, ok := userMounts[m.Destination]; !ok {
   538  				if !stringutils.InSlice(m.Options, "ro") {
   539  					s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
   540  				}
   541  			}
   542  		}
   543  	}
   544  
   545  	if c.HostConfig.Privileged {
   546  		if !s.Root.Readonly {
   547  			// clear readonly for /sys
   548  			for i := range s.Mounts {
   549  				if s.Mounts[i].Destination == "/sys" {
   550  					clearReadOnly(&s.Mounts[i])
   551  				}
   552  			}
   553  		}
   554  		s.Linux.ReadonlyPaths = nil
   555  		s.Linux.MaskedPaths = nil
   556  	}
   557  
   558  	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
   559  	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
   560  	if uidMap, _ := daemon.GetUIDGIDMaps(); uidMap != nil || c.HostConfig.Privileged {
   561  		for i, m := range s.Mounts {
   562  			if m.Type == "cgroup" {
   563  				clearReadOnly(&s.Mounts[i])
   564  			}
   565  		}
   566  	}
   567  
   568  	return nil
   569  }
   570  
   571  func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
   572  	linkedEnv, err := daemon.setupLinkedContainers(c)
   573  	if err != nil {
   574  		return err
   575  	}
   576  	s.Root = specs.Root{
   577  		Path:     c.BaseFS,
   578  		Readonly: c.HostConfig.ReadonlyRootfs,
   579  	}
   580  	rootUID, rootGID := daemon.GetRemappedUIDGID()
   581  	if err := c.SetupWorkingDirectory(rootUID, rootGID); err != nil {
   582  		return err
   583  	}
   584  	cwd := c.Config.WorkingDir
   585  	if len(cwd) == 0 {
   586  		cwd = "/"
   587  	}
   588  	s.Process.Args = append([]string{c.Path}, c.Args...)
   589  
   590  	// only add the custom init if it is specified and the container is running in its
   591  	// own private pid namespace.  It does not make sense to add if it is running in the
   592  	// host namespace or another container's pid namespace where we already have an init
   593  	if c.HostConfig.PidMode.IsPrivate() {
   594  		if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
   595  			(c.HostConfig.Init == nil && daemon.configStore.Init) {
   596  			s.Process.Args = append([]string{"/dev/init", c.Path}, c.Args...)
   597  			var path string
   598  			if daemon.configStore.InitPath == "" && c.HostConfig.InitPath == "" {
   599  				path, err = exec.LookPath("docker-init")
   600  				if err != nil {
   601  					return err
   602  				}
   603  			}
   604  			if daemon.configStore.InitPath != "" {
   605  				path = daemon.configStore.InitPath
   606  			}
   607  			if c.HostConfig.InitPath != "" {
   608  				path = c.HostConfig.InitPath
   609  			}
   610  			s.Mounts = append(s.Mounts, specs.Mount{
   611  				Destination: "/dev/init",
   612  				Type:        "bind",
   613  				Source:      path,
   614  				Options:     []string{"bind", "ro"},
   615  			})
   616  		}
   617  	}
   618  	s.Process.Cwd = cwd
   619  	s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
   620  	s.Process.Terminal = c.Config.Tty
   621  	s.Hostname = c.FullHostname()
   622  
   623  	return nil
   624  }
   625  
   626  func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
   627  	s := oci.DefaultSpec()
   628  	if err := daemon.populateCommonSpec(&s, c); err != nil {
   629  		return nil, err
   630  	}
   631  
   632  	var cgroupsPath string
   633  	scopePrefix := "docker"
   634  	parent := "/docker"
   635  	useSystemd := UsingSystemd(daemon.configStore)
   636  	if useSystemd {
   637  		parent = "system.slice"
   638  	}
   639  
   640  	if c.HostConfig.CgroupParent != "" {
   641  		parent = c.HostConfig.CgroupParent
   642  	} else if daemon.configStore.CgroupParent != "" {
   643  		parent = daemon.configStore.CgroupParent
   644  	}
   645  
   646  	if useSystemd {
   647  		cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
   648  		logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
   649  	} else {
   650  		cgroupsPath = filepath.Join(parent, c.ID)
   651  	}
   652  	s.Linux.CgroupsPath = &cgroupsPath
   653  
   654  	if err := setResources(&s, c.HostConfig.Resources); err != nil {
   655  		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
   656  	}
   657  	s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
   658  	s.Linux.Sysctl = c.HostConfig.Sysctls
   659  
   660  	p := *s.Linux.CgroupsPath
   661  	if useSystemd {
   662  		initPath, err := cgroups.GetInitCgroupDir("cpu")
   663  		if err != nil {
   664  			return nil, err
   665  		}
   666  		p, _ = cgroups.GetThisCgroupDir("cpu")
   667  		if err != nil {
   668  			return nil, err
   669  		}
   670  		p = filepath.Join(initPath, p)
   671  	}
   672  
   673  	// Clean path to guard against things like ../../../BAD
   674  	parentPath := filepath.Dir(p)
   675  	if !filepath.IsAbs(parentPath) {
   676  		parentPath = filepath.Clean("/" + parentPath)
   677  	}
   678  
   679  	if err := daemon.initCgroupsPath(parentPath); err != nil {
   680  		return nil, fmt.Errorf("linux init cgroups path: %v", err)
   681  	}
   682  	if err := setDevices(&s, c); err != nil {
   683  		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
   684  	}
   685  	if err := setRlimits(daemon, &s, c); err != nil {
   686  		return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
   687  	}
   688  	if err := setUser(&s, c); err != nil {
   689  		return nil, fmt.Errorf("linux spec user: %v", err)
   690  	}
   691  	if err := setNamespaces(daemon, &s, c); err != nil {
   692  		return nil, fmt.Errorf("linux spec namespaces: %v", err)
   693  	}
   694  	if err := setCapabilities(&s, c); err != nil {
   695  		return nil, fmt.Errorf("linux spec capabilities: %v", err)
   696  	}
   697  	if err := setSeccomp(daemon, &s, c); err != nil {
   698  		return nil, fmt.Errorf("linux seccomp: %v", err)
   699  	}
   700  
   701  	if err := daemon.setupIpcDirs(c); err != nil {
   702  		return nil, err
   703  	}
   704  
   705  	ms, err := daemon.setupMounts(c)
   706  	if err != nil {
   707  		return nil, err
   708  	}
   709  	ms = append(ms, c.IpcMounts()...)
   710  	ms = append(ms, c.TmpfsMounts()...)
   711  	sort.Sort(mounts(ms))
   712  	if err := setMounts(daemon, &s, c, ms); err != nil {
   713  		return nil, fmt.Errorf("linux mounts: %v", err)
   714  	}
   715  
   716  	for _, ns := range s.Linux.Namespaces {
   717  		if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
   718  			target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
   719  			if err != nil {
   720  				return nil, err
   721  			}
   722  
   723  			s.Hooks = specs.Hooks{
   724  				Prestart: []specs.Hook{{
   725  					Path: target, // FIXME: cross-platform
   726  					Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
   727  				}},
   728  			}
   729  		}
   730  	}
   731  
   732  	if apparmor.IsEnabled() {
   733  		appArmorProfile := "docker-default"
   734  		if len(c.AppArmorProfile) > 0 {
   735  			appArmorProfile = c.AppArmorProfile
   736  		} else if c.HostConfig.Privileged {
   737  			appArmorProfile = "unconfined"
   738  		}
   739  		s.Process.ApparmorProfile = appArmorProfile
   740  	}
   741  	s.Process.SelinuxLabel = c.GetProcessLabel()
   742  	s.Process.NoNewPrivileges = c.NoNewPrivileges
   743  	s.Linux.MountLabel = c.MountLabel
   744  
   745  	return (*specs.Spec)(&s), nil
   746  }
   747  
   748  func clearReadOnly(m *specs.Mount) {
   749  	var opt []string
   750  	for _, o := range m.Options {
   751  		if o != "ro" {
   752  			opt = append(opt, o)
   753  		}
   754  	}
   755  	m.Options = opt
   756  }
   757  
   758  // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
   759  func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
   760  	ulimits := c.Ulimits
   761  	// Merge ulimits with daemon defaults
   762  	ulIdx := make(map[string]struct{})
   763  	for _, ul := range ulimits {
   764  		ulIdx[ul.Name] = struct{}{}
   765  	}
   766  	for name, ul := range daemon.configStore.Ulimits {
   767  		if _, exists := ulIdx[name]; !exists {
   768  			ulimits = append(ulimits, ul)
   769  		}
   770  	}
   771  	c.Ulimits = ulimits
   772  }