github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/daemon/oci_linux.go (about)

     1  package daemon
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"regexp"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  
    14  	"github.com/Sirupsen/logrus"
    15  	containertypes "github.com/docker/docker/api/types/container"
    16  	"github.com/docker/docker/container"
    17  	"github.com/docker/docker/daemon/caps"
    18  	daemonconfig "github.com/docker/docker/daemon/config"
    19  	"github.com/docker/docker/oci"
    20  	"github.com/docker/docker/pkg/idtools"
    21  	"github.com/docker/docker/pkg/mount"
    22  	"github.com/docker/docker/pkg/stringutils"
    23  	"github.com/docker/docker/pkg/symlink"
    24  	"github.com/docker/docker/volume"
    25  	"github.com/opencontainers/runc/libcontainer/apparmor"
    26  	"github.com/opencontainers/runc/libcontainer/cgroups"
    27  	"github.com/opencontainers/runc/libcontainer/devices"
    28  	"github.com/opencontainers/runc/libcontainer/user"
    29  	specs "github.com/opencontainers/runtime-spec/specs-go"
    30  )
    31  
    32  var (
    33  	deviceCgroupRuleRegex = regexp.MustCompile("^([acb]) ([0-9]+|\\*):([0-9]+|\\*) ([rwm]{1,3})$")
    34  )
    35  
    36  func setResources(s *specs.Spec, r containertypes.Resources) error {
    37  	weightDevices, err := getBlkioWeightDevices(r)
    38  	if err != nil {
    39  		return err
    40  	}
    41  	readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps)
    42  	if err != nil {
    43  		return err
    44  	}
    45  	writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps)
    46  	if err != nil {
    47  		return err
    48  	}
    49  	readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps)
    50  	if err != nil {
    51  		return err
    52  	}
    53  	writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps)
    54  	if err != nil {
    55  		return err
    56  	}
    57  
    58  	memoryRes := getMemoryResources(r)
    59  	cpuRes, err := getCPUResources(r)
    60  	if err != nil {
    61  		return err
    62  	}
    63  	blkioWeight := r.BlkioWeight
    64  
    65  	specResources := &specs.LinuxResources{
    66  		Memory: memoryRes,
    67  		CPU:    cpuRes,
    68  		BlockIO: &specs.LinuxBlockIO{
    69  			Weight:                  &blkioWeight,
    70  			WeightDevice:            weightDevices,
    71  			ThrottleReadBpsDevice:   readBpsDevice,
    72  			ThrottleWriteBpsDevice:  writeBpsDevice,
    73  			ThrottleReadIOPSDevice:  readIOpsDevice,
    74  			ThrottleWriteIOPSDevice: writeIOpsDevice,
    75  		},
    76  		DisableOOMKiller: r.OomKillDisable,
    77  		Pids: &specs.LinuxPids{
    78  			Limit: r.PidsLimit,
    79  		},
    80  	}
    81  
    82  	if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 {
    83  		specResources.Devices = s.Linux.Resources.Devices
    84  	}
    85  
    86  	s.Linux.Resources = specResources
    87  	return nil
    88  }
    89  
    90  func setDevices(s *specs.Spec, c *container.Container) error {
    91  	// Build lists of devices allowed and created within the container.
    92  	var devs []specs.LinuxDevice
    93  	devPermissions := s.Linux.Resources.Devices
    94  	if c.HostConfig.Privileged {
    95  		hostDevices, err := devices.HostDevices()
    96  		if err != nil {
    97  			return err
    98  		}
    99  		for _, d := range hostDevices {
   100  			devs = append(devs, oci.Device(d))
   101  		}
   102  		devPermissions = []specs.LinuxDeviceCgroup{
   103  			{
   104  				Allow:  true,
   105  				Access: "rwm",
   106  			},
   107  		}
   108  	} else {
   109  		for _, deviceMapping := range c.HostConfig.Devices {
   110  			d, dPermissions, err := oci.DevicesFromPath(deviceMapping.PathOnHost, deviceMapping.PathInContainer, deviceMapping.CgroupPermissions)
   111  			if err != nil {
   112  				return err
   113  			}
   114  			devs = append(devs, d...)
   115  			devPermissions = append(devPermissions, dPermissions...)
   116  		}
   117  
   118  		for _, deviceCgroupRule := range c.HostConfig.DeviceCgroupRules {
   119  			ss := deviceCgroupRuleRegex.FindAllStringSubmatch(deviceCgroupRule, -1)
   120  			if len(ss[0]) != 5 {
   121  				return fmt.Errorf("invalid device cgroup rule format: '%s'", deviceCgroupRule)
   122  			}
   123  			matches := ss[0]
   124  
   125  			dPermissions := specs.LinuxDeviceCgroup{
   126  				Allow:  true,
   127  				Type:   matches[1],
   128  				Access: matches[4],
   129  			}
   130  			if matches[2] == "*" {
   131  				major := int64(-1)
   132  				dPermissions.Major = &major
   133  			} else {
   134  				major, err := strconv.ParseInt(matches[2], 10, 64)
   135  				if err != nil {
   136  					return fmt.Errorf("invalid major value in device cgroup rule format: '%s'", deviceCgroupRule)
   137  				}
   138  				dPermissions.Major = &major
   139  			}
   140  			if matches[3] == "*" {
   141  				minor := int64(-1)
   142  				dPermissions.Minor = &minor
   143  			} else {
   144  				minor, err := strconv.ParseInt(matches[3], 10, 64)
   145  				if err != nil {
   146  					return fmt.Errorf("invalid minor value in device cgroup rule format: '%s'", deviceCgroupRule)
   147  				}
   148  				dPermissions.Minor = &minor
   149  			}
   150  			devPermissions = append(devPermissions, dPermissions)
   151  		}
   152  	}
   153  
   154  	s.Linux.Devices = append(s.Linux.Devices, devs...)
   155  	s.Linux.Resources.Devices = devPermissions
   156  	return nil
   157  }
   158  
   159  func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error {
   160  	var rlimits []specs.LinuxRlimit
   161  
   162  	// We want to leave the original HostConfig alone so make a copy here
   163  	hostConfig := *c.HostConfig
   164  	// Merge with the daemon defaults
   165  	daemon.mergeUlimits(&hostConfig)
   166  	for _, ul := range hostConfig.Ulimits {
   167  		rlimits = append(rlimits, specs.LinuxRlimit{
   168  			Type: "RLIMIT_" + strings.ToUpper(ul.Name),
   169  			Soft: uint64(ul.Soft),
   170  			Hard: uint64(ul.Hard),
   171  		})
   172  	}
   173  
   174  	s.Process.Rlimits = rlimits
   175  	return nil
   176  }
   177  
   178  func setUser(s *specs.Spec, c *container.Container) error {
   179  	uid, gid, additionalGids, err := getUser(c, c.Config.User)
   180  	if err != nil {
   181  		return err
   182  	}
   183  	s.Process.User.UID = uid
   184  	s.Process.User.GID = gid
   185  	s.Process.User.AdditionalGids = additionalGids
   186  	return nil
   187  }
   188  
   189  func readUserFile(c *container.Container, p string) (io.ReadCloser, error) {
   190  	fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS)
   191  	if err != nil {
   192  		return nil, err
   193  	}
   194  	return os.Open(fp)
   195  }
   196  
   197  func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) {
   198  	passwdPath, err := user.GetPasswdPath()
   199  	if err != nil {
   200  		return 0, 0, nil, err
   201  	}
   202  	groupPath, err := user.GetGroupPath()
   203  	if err != nil {
   204  		return 0, 0, nil, err
   205  	}
   206  	passwdFile, err := readUserFile(c, passwdPath)
   207  	if err == nil {
   208  		defer passwdFile.Close()
   209  	}
   210  	groupFile, err := readUserFile(c, groupPath)
   211  	if err == nil {
   212  		defer groupFile.Close()
   213  	}
   214  
   215  	execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile)
   216  	if err != nil {
   217  		return 0, 0, nil, err
   218  	}
   219  
   220  	// todo: fix this double read by a change to libcontainer/user pkg
   221  	groupFile, err = readUserFile(c, groupPath)
   222  	if err == nil {
   223  		defer groupFile.Close()
   224  	}
   225  	var addGroups []int
   226  	if len(c.HostConfig.GroupAdd) > 0 {
   227  		addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile)
   228  		if err != nil {
   229  			return 0, 0, nil, err
   230  		}
   231  	}
   232  	uid := uint32(execUser.Uid)
   233  	gid := uint32(execUser.Gid)
   234  	sgids := append(execUser.Sgids, addGroups...)
   235  	var additionalGids []uint32
   236  	for _, g := range sgids {
   237  		additionalGids = append(additionalGids, uint32(g))
   238  	}
   239  	return uid, gid, additionalGids, nil
   240  }
   241  
   242  func setNamespace(s *specs.Spec, ns specs.LinuxNamespace) {
   243  	for i, n := range s.Linux.Namespaces {
   244  		if n.Type == ns.Type {
   245  			s.Linux.Namespaces[i] = ns
   246  			return
   247  		}
   248  	}
   249  	s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
   250  }
   251  
   252  func setCapabilities(s *specs.Spec, c *container.Container) error {
   253  	var caplist []string
   254  	var err error
   255  	if c.HostConfig.Privileged {
   256  		caplist = caps.GetAllCapabilities()
   257  	} else {
   258  		caplist, err = caps.TweakCapabilities(s.Process.Capabilities.Effective, c.HostConfig.CapAdd, c.HostConfig.CapDrop)
   259  		if err != nil {
   260  			return err
   261  		}
   262  	}
   263  	s.Process.Capabilities.Effective = caplist
   264  	s.Process.Capabilities.Bounding = caplist
   265  	s.Process.Capabilities.Permitted = caplist
   266  	s.Process.Capabilities.Inheritable = caplist
   267  	return nil
   268  }
   269  
   270  func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
   271  	userNS := false
   272  	// user
   273  	if c.HostConfig.UsernsMode.IsPrivate() {
   274  		uidMap := daemon.idMappings.UIDs()
   275  		if uidMap != nil {
   276  			userNS = true
   277  			ns := specs.LinuxNamespace{Type: "user"}
   278  			setNamespace(s, ns)
   279  			s.Linux.UIDMappings = specMapping(uidMap)
   280  			s.Linux.GIDMappings = specMapping(daemon.idMappings.GIDs())
   281  		}
   282  	}
   283  	// network
   284  	if !c.Config.NetworkDisabled {
   285  		ns := specs.LinuxNamespace{Type: "network"}
   286  		parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
   287  		if parts[0] == "container" {
   288  			nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
   289  			if err != nil {
   290  				return err
   291  			}
   292  			ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
   293  			if userNS {
   294  				// to share a net namespace, they must also share a user namespace
   295  				nsUser := specs.LinuxNamespace{Type: "user"}
   296  				nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
   297  				setNamespace(s, nsUser)
   298  			}
   299  		} else if c.HostConfig.NetworkMode.IsHost() {
   300  			ns.Path = c.NetworkSettings.SandboxKey
   301  		}
   302  		setNamespace(s, ns)
   303  	}
   304  	// ipc
   305  	if c.HostConfig.IpcMode.IsContainer() {
   306  		ns := specs.LinuxNamespace{Type: "ipc"}
   307  		ic, err := daemon.getIpcContainer(c)
   308  		if err != nil {
   309  			return err
   310  		}
   311  		ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
   312  		setNamespace(s, ns)
   313  		if userNS {
   314  			// to share an IPC namespace, they must also share a user namespace
   315  			nsUser := specs.LinuxNamespace{Type: "user"}
   316  			nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
   317  			setNamespace(s, nsUser)
   318  		}
   319  	} else if c.HostConfig.IpcMode.IsHost() {
   320  		oci.RemoveNamespace(s, specs.LinuxNamespaceType("ipc"))
   321  	} else {
   322  		ns := specs.LinuxNamespace{Type: "ipc"}
   323  		setNamespace(s, ns)
   324  	}
   325  	// pid
   326  	if c.HostConfig.PidMode.IsContainer() {
   327  		ns := specs.LinuxNamespace{Type: "pid"}
   328  		pc, err := daemon.getPidContainer(c)
   329  		if err != nil {
   330  			return err
   331  		}
   332  		ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
   333  		setNamespace(s, ns)
   334  		if userNS {
   335  			// to share a PID namespace, they must also share a user namespace
   336  			nsUser := specs.LinuxNamespace{Type: "user"}
   337  			nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
   338  			setNamespace(s, nsUser)
   339  		}
   340  	} else if c.HostConfig.PidMode.IsHost() {
   341  		oci.RemoveNamespace(s, specs.LinuxNamespaceType("pid"))
   342  	} else {
   343  		ns := specs.LinuxNamespace{Type: "pid"}
   344  		setNamespace(s, ns)
   345  	}
   346  	// uts
   347  	if c.HostConfig.UTSMode.IsHost() {
   348  		oci.RemoveNamespace(s, specs.LinuxNamespaceType("uts"))
   349  		s.Hostname = ""
   350  	}
   351  
   352  	return nil
   353  }
   354  
   355  func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping {
   356  	var ids []specs.LinuxIDMapping
   357  	for _, item := range s {
   358  		ids = append(ids, specs.LinuxIDMapping{
   359  			HostID:      uint32(item.HostID),
   360  			ContainerID: uint32(item.ContainerID),
   361  			Size:        uint32(item.Size),
   362  		})
   363  	}
   364  	return ids
   365  }
   366  
   367  func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
   368  	for _, m := range mountinfo {
   369  		if m.Mountpoint == dir {
   370  			return m
   371  		}
   372  	}
   373  	return nil
   374  }
   375  
   376  // Get the source mount point of directory passed in as argument. Also return
   377  // optional fields.
   378  func getSourceMount(source string) (string, string, error) {
   379  	// Ensure any symlinks are resolved.
   380  	sourcePath, err := filepath.EvalSymlinks(source)
   381  	if err != nil {
   382  		return "", "", err
   383  	}
   384  
   385  	mountinfos, err := mount.GetMounts()
   386  	if err != nil {
   387  		return "", "", err
   388  	}
   389  
   390  	mountinfo := getMountInfo(mountinfos, sourcePath)
   391  	if mountinfo != nil {
   392  		return sourcePath, mountinfo.Optional, nil
   393  	}
   394  
   395  	path := sourcePath
   396  	for {
   397  		path = filepath.Dir(path)
   398  
   399  		mountinfo = getMountInfo(mountinfos, path)
   400  		if mountinfo != nil {
   401  			return path, mountinfo.Optional, nil
   402  		}
   403  
   404  		if path == "/" {
   405  			break
   406  		}
   407  	}
   408  
   409  	// If we are here, we did not find parent mount. Something is wrong.
   410  	return "", "", fmt.Errorf("Could not find source mount of %s", source)
   411  }
   412  
   413  // Ensure mount point on which path is mounted, is shared.
   414  func ensureShared(path string) error {
   415  	sharedMount := false
   416  
   417  	sourceMount, optionalOpts, err := getSourceMount(path)
   418  	if err != nil {
   419  		return err
   420  	}
   421  	// Make sure source mount point is shared.
   422  	optsSplit := strings.Split(optionalOpts, " ")
   423  	for _, opt := range optsSplit {
   424  		if strings.HasPrefix(opt, "shared:") {
   425  			sharedMount = true
   426  			break
   427  		}
   428  	}
   429  
   430  	if !sharedMount {
   431  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
   432  	}
   433  	return nil
   434  }
   435  
   436  // Ensure mount point on which path is mounted, is either shared or slave.
   437  func ensureSharedOrSlave(path string) error {
   438  	sharedMount := false
   439  	slaveMount := false
   440  
   441  	sourceMount, optionalOpts, err := getSourceMount(path)
   442  	if err != nil {
   443  		return err
   444  	}
   445  	// Make sure source mount point is shared.
   446  	optsSplit := strings.Split(optionalOpts, " ")
   447  	for _, opt := range optsSplit {
   448  		if strings.HasPrefix(opt, "shared:") {
   449  			sharedMount = true
   450  			break
   451  		} else if strings.HasPrefix(opt, "master:") {
   452  			slaveMount = true
   453  			break
   454  		}
   455  	}
   456  
   457  	if !sharedMount && !slaveMount {
   458  		return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
   459  	}
   460  	return nil
   461  }
   462  
   463  var (
   464  	mountPropagationMap = map[string]int{
   465  		"private":  mount.PRIVATE,
   466  		"rprivate": mount.RPRIVATE,
   467  		"shared":   mount.SHARED,
   468  		"rshared":  mount.RSHARED,
   469  		"slave":    mount.SLAVE,
   470  		"rslave":   mount.RSLAVE,
   471  	}
   472  
   473  	mountPropagationReverseMap = map[int]string{
   474  		mount.PRIVATE:  "private",
   475  		mount.RPRIVATE: "rprivate",
   476  		mount.SHARED:   "shared",
   477  		mount.RSHARED:  "rshared",
   478  		mount.SLAVE:    "slave",
   479  		mount.RSLAVE:   "rslave",
   480  	}
   481  )
   482  
   483  func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error {
   484  	userMounts := make(map[string]struct{})
   485  	for _, m := range mounts {
   486  		userMounts[m.Destination] = struct{}{}
   487  	}
   488  
   489  	// Filter out mounts that are overridden by user supplied mounts
   490  	var defaultMounts []specs.Mount
   491  	_, mountDev := userMounts["/dev"]
   492  	for _, m := range s.Mounts {
   493  		if _, ok := userMounts[m.Destination]; !ok {
   494  			if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
   495  				continue
   496  			}
   497  			defaultMounts = append(defaultMounts, m)
   498  		}
   499  	}
   500  
   501  	s.Mounts = defaultMounts
   502  	for _, m := range mounts {
   503  		for _, cm := range s.Mounts {
   504  			if cm.Destination == m.Destination {
   505  				return fmt.Errorf("Duplicate mount point '%s'", m.Destination)
   506  			}
   507  		}
   508  
   509  		if m.Source == "tmpfs" {
   510  			data := m.Data
   511  			options := []string{"noexec", "nosuid", "nodev", string(volume.DefaultPropagationMode)}
   512  			if data != "" {
   513  				options = append(options, strings.Split(data, ",")...)
   514  			}
   515  
   516  			merged, err := mount.MergeTmpfsOptions(options)
   517  			if err != nil {
   518  				return err
   519  			}
   520  
   521  			s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged})
   522  			continue
   523  		}
   524  
   525  		mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"}
   526  
   527  		// Determine property of RootPropagation based on volume
   528  		// properties. If a volume is shared, then keep root propagation
   529  		// shared. This should work for slave and private volumes too.
   530  		//
   531  		// For slave volumes, it can be either [r]shared/[r]slave.
   532  		//
   533  		// For private volumes any root propagation value should work.
   534  		pFlag := mountPropagationMap[m.Propagation]
   535  		if pFlag == mount.SHARED || pFlag == mount.RSHARED {
   536  			if err := ensureShared(m.Source); err != nil {
   537  				return err
   538  			}
   539  			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
   540  			if rootpg != mount.SHARED && rootpg != mount.RSHARED {
   541  				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED]
   542  			}
   543  		} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
   544  			if err := ensureSharedOrSlave(m.Source); err != nil {
   545  				return err
   546  			}
   547  			rootpg := mountPropagationMap[s.Linux.RootfsPropagation]
   548  			if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
   549  				s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE]
   550  			}
   551  		}
   552  
   553  		opts := []string{"rbind"}
   554  		if !m.Writable {
   555  			opts = append(opts, "ro")
   556  		}
   557  		if pFlag != 0 {
   558  			opts = append(opts, mountPropagationReverseMap[pFlag])
   559  		}
   560  
   561  		mt.Options = opts
   562  		s.Mounts = append(s.Mounts, mt)
   563  	}
   564  
   565  	if s.Root.Readonly {
   566  		for i, m := range s.Mounts {
   567  			switch m.Destination {
   568  			case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc
   569  				continue
   570  			}
   571  			if _, ok := userMounts[m.Destination]; !ok {
   572  				if !stringutils.InSlice(m.Options, "ro") {
   573  					s.Mounts[i].Options = append(s.Mounts[i].Options, "ro")
   574  				}
   575  			}
   576  		}
   577  	}
   578  
   579  	if c.HostConfig.Privileged {
   580  		if !s.Root.Readonly {
   581  			// clear readonly for /sys
   582  			for i := range s.Mounts {
   583  				if s.Mounts[i].Destination == "/sys" {
   584  					clearReadOnly(&s.Mounts[i])
   585  				}
   586  			}
   587  		}
   588  		s.Linux.ReadonlyPaths = nil
   589  		s.Linux.MaskedPaths = nil
   590  	}
   591  
   592  	// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
   593  	// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
   594  	if uidMap := daemon.idMappings.UIDs(); uidMap != nil || c.HostConfig.Privileged {
   595  		for i, m := range s.Mounts {
   596  			if m.Type == "cgroup" {
   597  				clearReadOnly(&s.Mounts[i])
   598  			}
   599  		}
   600  	}
   601  
   602  	return nil
   603  }
   604  
   605  func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
   606  	linkedEnv, err := daemon.setupLinkedContainers(c)
   607  	if err != nil {
   608  		return err
   609  	}
   610  	s.Root = specs.Root{
   611  		Path:     c.BaseFS,
   612  		Readonly: c.HostConfig.ReadonlyRootfs,
   613  	}
   614  	if err := c.SetupWorkingDirectory(daemon.idMappings.RootPair()); err != nil {
   615  		return err
   616  	}
   617  	cwd := c.Config.WorkingDir
   618  	if len(cwd) == 0 {
   619  		cwd = "/"
   620  	}
   621  	s.Process.Args = append([]string{c.Path}, c.Args...)
   622  
   623  	// only add the custom init if it is specified and the container is running in its
   624  	// own private pid namespace.  It does not make sense to add if it is running in the
   625  	// host namespace or another container's pid namespace where we already have an init
   626  	if c.HostConfig.PidMode.IsPrivate() {
   627  		if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
   628  			(c.HostConfig.Init == nil && daemon.configStore.Init) {
   629  			s.Process.Args = append([]string{"/dev/init", "--", c.Path}, c.Args...)
   630  			var path string
   631  			if daemon.configStore.InitPath == "" {
   632  				path, err = exec.LookPath(daemonconfig.DefaultInitBinary)
   633  				if err != nil {
   634  					return err
   635  				}
   636  			}
   637  			if daemon.configStore.InitPath != "" {
   638  				path = daemon.configStore.InitPath
   639  			}
   640  			s.Mounts = append(s.Mounts, specs.Mount{
   641  				Destination: "/dev/init",
   642  				Type:        "bind",
   643  				Source:      path,
   644  				Options:     []string{"bind", "ro"},
   645  			})
   646  		}
   647  	}
   648  	s.Process.Cwd = cwd
   649  	s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
   650  	s.Process.Terminal = c.Config.Tty
   651  	s.Hostname = c.FullHostname()
   652  
   653  	return nil
   654  }
   655  
   656  func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) {
   657  	s := oci.DefaultSpec()
   658  	if err := daemon.populateCommonSpec(&s, c); err != nil {
   659  		return nil, err
   660  	}
   661  
   662  	var cgroupsPath string
   663  	scopePrefix := "docker"
   664  	parent := "/docker"
   665  	useSystemd := UsingSystemd(daemon.configStore)
   666  	if useSystemd {
   667  		parent = "system.slice"
   668  	}
   669  
   670  	if c.HostConfig.CgroupParent != "" {
   671  		parent = c.HostConfig.CgroupParent
   672  	} else if daemon.configStore.CgroupParent != "" {
   673  		parent = daemon.configStore.CgroupParent
   674  	}
   675  
   676  	if useSystemd {
   677  		cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
   678  		logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
   679  	} else {
   680  		cgroupsPath = filepath.Join(parent, c.ID)
   681  	}
   682  	s.Linux.CgroupsPath = cgroupsPath
   683  
   684  	if err := setResources(&s, c.HostConfig.Resources); err != nil {
   685  		return nil, fmt.Errorf("linux runtime spec resources: %v", err)
   686  	}
   687  	s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj
   688  	s.Linux.Sysctl = c.HostConfig.Sysctls
   689  
   690  	p := s.Linux.CgroupsPath
   691  	if useSystemd {
   692  		initPath, err := cgroups.GetInitCgroup("cpu")
   693  		if err != nil {
   694  			return nil, err
   695  		}
   696  		p, _ = cgroups.GetOwnCgroup("cpu")
   697  		if err != nil {
   698  			return nil, err
   699  		}
   700  		p = filepath.Join(initPath, p)
   701  	}
   702  
   703  	// Clean path to guard against things like ../../../BAD
   704  	parentPath := filepath.Dir(p)
   705  	if !filepath.IsAbs(parentPath) {
   706  		parentPath = filepath.Clean("/" + parentPath)
   707  	}
   708  
   709  	if err := daemon.initCgroupsPath(parentPath); err != nil {
   710  		return nil, fmt.Errorf("linux init cgroups path: %v", err)
   711  	}
   712  	if err := setDevices(&s, c); err != nil {
   713  		return nil, fmt.Errorf("linux runtime spec devices: %v", err)
   714  	}
   715  	if err := setRlimits(daemon, &s, c); err != nil {
   716  		return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
   717  	}
   718  	if err := setUser(&s, c); err != nil {
   719  		return nil, fmt.Errorf("linux spec user: %v", err)
   720  	}
   721  	if err := setNamespaces(daemon, &s, c); err != nil {
   722  		return nil, fmt.Errorf("linux spec namespaces: %v", err)
   723  	}
   724  	if err := setCapabilities(&s, c); err != nil {
   725  		return nil, fmt.Errorf("linux spec capabilities: %v", err)
   726  	}
   727  	if err := setSeccomp(daemon, &s, c); err != nil {
   728  		return nil, fmt.Errorf("linux seccomp: %v", err)
   729  	}
   730  
   731  	if err := daemon.setupIpcDirs(c); err != nil {
   732  		return nil, err
   733  	}
   734  
   735  	if err := daemon.setupSecretDir(c); err != nil {
   736  		return nil, err
   737  	}
   738  
   739  	if err := daemon.setupConfigDir(c); err != nil {
   740  		return nil, err
   741  	}
   742  
   743  	ms, err := daemon.setupMounts(c)
   744  	if err != nil {
   745  		return nil, err
   746  	}
   747  
   748  	ms = append(ms, c.IpcMounts()...)
   749  
   750  	tmpfsMounts, err := c.TmpfsMounts()
   751  	if err != nil {
   752  		return nil, err
   753  	}
   754  	ms = append(ms, tmpfsMounts...)
   755  
   756  	if m := c.SecretMounts(); m != nil {
   757  		ms = append(ms, m...)
   758  	}
   759  
   760  	ms = append(ms, c.ConfigMounts()...)
   761  
   762  	sort.Sort(mounts(ms))
   763  	if err := setMounts(daemon, &s, c, ms); err != nil {
   764  		return nil, fmt.Errorf("linux mounts: %v", err)
   765  	}
   766  
   767  	for _, ns := range s.Linux.Namespaces {
   768  		if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
   769  			target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe"))
   770  			if err != nil {
   771  				return nil, err
   772  			}
   773  
   774  			s.Hooks = &specs.Hooks{
   775  				Prestart: []specs.Hook{{
   776  					Path: target, // FIXME: cross-platform
   777  					Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()},
   778  				}},
   779  			}
   780  		}
   781  	}
   782  
   783  	if apparmor.IsEnabled() {
   784  		var appArmorProfile string
   785  		if c.AppArmorProfile != "" {
   786  			appArmorProfile = c.AppArmorProfile
   787  		} else if c.HostConfig.Privileged {
   788  			appArmorProfile = "unconfined"
   789  		} else {
   790  			appArmorProfile = "docker-default"
   791  		}
   792  
   793  		if appArmorProfile == "docker-default" {
   794  			// Unattended upgrades and other fun services can unload AppArmor
   795  			// profiles inadvertently. Since we cannot store our profile in
   796  			// /etc/apparmor.d, nor can we practically add other ways of
   797  			// telling the system to keep our profile loaded, in order to make
   798  			// sure that we keep the default profile enabled we dynamically
   799  			// reload it if necessary.
   800  			if err := ensureDefaultAppArmorProfile(); err != nil {
   801  				return nil, err
   802  			}
   803  		}
   804  
   805  		s.Process.ApparmorProfile = appArmorProfile
   806  	}
   807  	s.Process.SelinuxLabel = c.GetProcessLabel()
   808  	s.Process.NoNewPrivileges = c.NoNewPrivileges
   809  	s.Linux.MountLabel = c.MountLabel
   810  
   811  	return (*specs.Spec)(&s), nil
   812  }
   813  
   814  func clearReadOnly(m *specs.Mount) {
   815  	var opt []string
   816  	for _, o := range m.Options {
   817  		if o != "ro" {
   818  			opt = append(opt, o)
   819  		}
   820  	}
   821  	m.Options = opt
   822  }
   823  
   824  // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig
   825  func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) {
   826  	ulimits := c.Ulimits
   827  	// Merge ulimits with daemon defaults
   828  	ulIdx := make(map[string]struct{})
   829  	for _, ul := range ulimits {
   830  		ulIdx[ul.Name] = struct{}{}
   831  	}
   832  	for name, ul := range daemon.configStore.Ulimits {
   833  		if _, exists := ulIdx[name]; !exists {
   834  			ulimits = append(ulimits, ul)
   835  		}
   836  	}
   837  	c.Ulimits = ulimits
   838  }