github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/daemon/oci_linux.go (about) 1 package daemon 2 3 import ( 4 "fmt" 5 "io" 6 "os" 7 "os/exec" 8 "path/filepath" 9 "regexp" 10 "sort" 11 "strconv" 12 "strings" 13 14 "github.com/Sirupsen/logrus" 15 containertypes "github.com/docker/docker/api/types/container" 16 "github.com/docker/docker/container" 17 "github.com/docker/docker/daemon/caps" 18 daemonconfig "github.com/docker/docker/daemon/config" 19 "github.com/docker/docker/oci" 20 "github.com/docker/docker/pkg/idtools" 21 "github.com/docker/docker/pkg/mount" 22 "github.com/docker/docker/pkg/stringutils" 23 "github.com/docker/docker/pkg/symlink" 24 "github.com/docker/docker/volume" 25 "github.com/opencontainers/runc/libcontainer/apparmor" 26 "github.com/opencontainers/runc/libcontainer/cgroups" 27 "github.com/opencontainers/runc/libcontainer/devices" 28 "github.com/opencontainers/runc/libcontainer/user" 29 specs "github.com/opencontainers/runtime-spec/specs-go" 30 ) 31 32 var ( 33 deviceCgroupRuleRegex = regexp.MustCompile("^([acb]) ([0-9]+|\\*):([0-9]+|\\*) ([rwm]{1,3})$") 34 ) 35 36 func setResources(s *specs.Spec, r containertypes.Resources) error { 37 weightDevices, err := getBlkioWeightDevices(r) 38 if err != nil { 39 return err 40 } 41 readBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadBps) 42 if err != nil { 43 return err 44 } 45 writeBpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteBps) 46 if err != nil { 47 return err 48 } 49 readIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceReadIOps) 50 if err != nil { 51 return err 52 } 53 writeIOpsDevice, err := getBlkioThrottleDevices(r.BlkioDeviceWriteIOps) 54 if err != nil { 55 return err 56 } 57 58 memoryRes := getMemoryResources(r) 59 cpuRes, err := getCPUResources(r) 60 if err != nil { 61 return err 62 } 63 blkioWeight := r.BlkioWeight 64 65 specResources := &specs.LinuxResources{ 66 Memory: memoryRes, 67 CPU: cpuRes, 68 BlockIO: &specs.LinuxBlockIO{ 69 Weight: &blkioWeight, 70 WeightDevice: weightDevices, 71 ThrottleReadBpsDevice: readBpsDevice, 72 ThrottleWriteBpsDevice: writeBpsDevice, 73 ThrottleReadIOPSDevice: readIOpsDevice, 74 ThrottleWriteIOPSDevice: writeIOpsDevice, 75 }, 76 DisableOOMKiller: r.OomKillDisable, 77 Pids: &specs.LinuxPids{ 78 Limit: r.PidsLimit, 79 }, 80 } 81 82 if s.Linux.Resources != nil && len(s.Linux.Resources.Devices) > 0 { 83 specResources.Devices = s.Linux.Resources.Devices 84 } 85 86 s.Linux.Resources = specResources 87 return nil 88 } 89 90 func setDevices(s *specs.Spec, c *container.Container) error { 91 // Build lists of devices allowed and created within the container. 92 var devs []specs.LinuxDevice 93 devPermissions := s.Linux.Resources.Devices 94 if c.HostConfig.Privileged { 95 hostDevices, err := devices.HostDevices() 96 if err != nil { 97 return err 98 } 99 for _, d := range hostDevices { 100 devs = append(devs, oci.Device(d)) 101 } 102 devPermissions = []specs.LinuxDeviceCgroup{ 103 { 104 Allow: true, 105 Access: "rwm", 106 }, 107 } 108 } else { 109 for _, deviceMapping := range c.HostConfig.Devices { 110 d, dPermissions, err := oci.DevicesFromPath(deviceMapping.PathOnHost, deviceMapping.PathInContainer, deviceMapping.CgroupPermissions) 111 if err != nil { 112 return err 113 } 114 devs = append(devs, d...) 115 devPermissions = append(devPermissions, dPermissions...) 116 } 117 118 for _, deviceCgroupRule := range c.HostConfig.DeviceCgroupRules { 119 ss := deviceCgroupRuleRegex.FindAllStringSubmatch(deviceCgroupRule, -1) 120 if len(ss[0]) != 5 { 121 return fmt.Errorf("invalid device cgroup rule format: '%s'", deviceCgroupRule) 122 } 123 matches := ss[0] 124 125 dPermissions := specs.LinuxDeviceCgroup{ 126 Allow: true, 127 Type: matches[1], 128 Access: matches[4], 129 } 130 if matches[2] == "*" { 131 major := int64(-1) 132 dPermissions.Major = &major 133 } else { 134 major, err := strconv.ParseInt(matches[2], 10, 64) 135 if err != nil { 136 return fmt.Errorf("invalid major value in device cgroup rule format: '%s'", deviceCgroupRule) 137 } 138 dPermissions.Major = &major 139 } 140 if matches[3] == "*" { 141 minor := int64(-1) 142 dPermissions.Minor = &minor 143 } else { 144 minor, err := strconv.ParseInt(matches[3], 10, 64) 145 if err != nil { 146 return fmt.Errorf("invalid minor value in device cgroup rule format: '%s'", deviceCgroupRule) 147 } 148 dPermissions.Minor = &minor 149 } 150 devPermissions = append(devPermissions, dPermissions) 151 } 152 } 153 154 s.Linux.Devices = append(s.Linux.Devices, devs...) 155 s.Linux.Resources.Devices = devPermissions 156 return nil 157 } 158 159 func setRlimits(daemon *Daemon, s *specs.Spec, c *container.Container) error { 160 var rlimits []specs.LinuxRlimit 161 162 // We want to leave the original HostConfig alone so make a copy here 163 hostConfig := *c.HostConfig 164 // Merge with the daemon defaults 165 daemon.mergeUlimits(&hostConfig) 166 for _, ul := range hostConfig.Ulimits { 167 rlimits = append(rlimits, specs.LinuxRlimit{ 168 Type: "RLIMIT_" + strings.ToUpper(ul.Name), 169 Soft: uint64(ul.Soft), 170 Hard: uint64(ul.Hard), 171 }) 172 } 173 174 s.Process.Rlimits = rlimits 175 return nil 176 } 177 178 func setUser(s *specs.Spec, c *container.Container) error { 179 uid, gid, additionalGids, err := getUser(c, c.Config.User) 180 if err != nil { 181 return err 182 } 183 s.Process.User.UID = uid 184 s.Process.User.GID = gid 185 s.Process.User.AdditionalGids = additionalGids 186 return nil 187 } 188 189 func readUserFile(c *container.Container, p string) (io.ReadCloser, error) { 190 fp, err := symlink.FollowSymlinkInScope(filepath.Join(c.BaseFS, p), c.BaseFS) 191 if err != nil { 192 return nil, err 193 } 194 return os.Open(fp) 195 } 196 197 func getUser(c *container.Container, username string) (uint32, uint32, []uint32, error) { 198 passwdPath, err := user.GetPasswdPath() 199 if err != nil { 200 return 0, 0, nil, err 201 } 202 groupPath, err := user.GetGroupPath() 203 if err != nil { 204 return 0, 0, nil, err 205 } 206 passwdFile, err := readUserFile(c, passwdPath) 207 if err == nil { 208 defer passwdFile.Close() 209 } 210 groupFile, err := readUserFile(c, groupPath) 211 if err == nil { 212 defer groupFile.Close() 213 } 214 215 execUser, err := user.GetExecUser(username, nil, passwdFile, groupFile) 216 if err != nil { 217 return 0, 0, nil, err 218 } 219 220 // todo: fix this double read by a change to libcontainer/user pkg 221 groupFile, err = readUserFile(c, groupPath) 222 if err == nil { 223 defer groupFile.Close() 224 } 225 var addGroups []int 226 if len(c.HostConfig.GroupAdd) > 0 { 227 addGroups, err = user.GetAdditionalGroups(c.HostConfig.GroupAdd, groupFile) 228 if err != nil { 229 return 0, 0, nil, err 230 } 231 } 232 uid := uint32(execUser.Uid) 233 gid := uint32(execUser.Gid) 234 sgids := append(execUser.Sgids, addGroups...) 235 var additionalGids []uint32 236 for _, g := range sgids { 237 additionalGids = append(additionalGids, uint32(g)) 238 } 239 return uid, gid, additionalGids, nil 240 } 241 242 func setNamespace(s *specs.Spec, ns specs.LinuxNamespace) { 243 for i, n := range s.Linux.Namespaces { 244 if n.Type == ns.Type { 245 s.Linux.Namespaces[i] = ns 246 return 247 } 248 } 249 s.Linux.Namespaces = append(s.Linux.Namespaces, ns) 250 } 251 252 func setCapabilities(s *specs.Spec, c *container.Container) error { 253 var caplist []string 254 var err error 255 if c.HostConfig.Privileged { 256 caplist = caps.GetAllCapabilities() 257 } else { 258 caplist, err = caps.TweakCapabilities(s.Process.Capabilities.Effective, c.HostConfig.CapAdd, c.HostConfig.CapDrop) 259 if err != nil { 260 return err 261 } 262 } 263 s.Process.Capabilities.Effective = caplist 264 s.Process.Capabilities.Bounding = caplist 265 s.Process.Capabilities.Permitted = caplist 266 s.Process.Capabilities.Inheritable = caplist 267 return nil 268 } 269 270 func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error { 271 userNS := false 272 // user 273 if c.HostConfig.UsernsMode.IsPrivate() { 274 uidMap := daemon.idMappings.UIDs() 275 if uidMap != nil { 276 userNS = true 277 ns := specs.LinuxNamespace{Type: "user"} 278 setNamespace(s, ns) 279 s.Linux.UIDMappings = specMapping(uidMap) 280 s.Linux.GIDMappings = specMapping(daemon.idMappings.GIDs()) 281 } 282 } 283 // network 284 if !c.Config.NetworkDisabled { 285 ns := specs.LinuxNamespace{Type: "network"} 286 parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2) 287 if parts[0] == "container" { 288 nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer()) 289 if err != nil { 290 return err 291 } 292 ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID()) 293 if userNS { 294 // to share a net namespace, they must also share a user namespace 295 nsUser := specs.LinuxNamespace{Type: "user"} 296 nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID()) 297 setNamespace(s, nsUser) 298 } 299 } else if c.HostConfig.NetworkMode.IsHost() { 300 ns.Path = c.NetworkSettings.SandboxKey 301 } 302 setNamespace(s, ns) 303 } 304 // ipc 305 if c.HostConfig.IpcMode.IsContainer() { 306 ns := specs.LinuxNamespace{Type: "ipc"} 307 ic, err := daemon.getIpcContainer(c) 308 if err != nil { 309 return err 310 } 311 ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID()) 312 setNamespace(s, ns) 313 if userNS { 314 // to share an IPC namespace, they must also share a user namespace 315 nsUser := specs.LinuxNamespace{Type: "user"} 316 nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID()) 317 setNamespace(s, nsUser) 318 } 319 } else if c.HostConfig.IpcMode.IsHost() { 320 oci.RemoveNamespace(s, specs.LinuxNamespaceType("ipc")) 321 } else { 322 ns := specs.LinuxNamespace{Type: "ipc"} 323 setNamespace(s, ns) 324 } 325 // pid 326 if c.HostConfig.PidMode.IsContainer() { 327 ns := specs.LinuxNamespace{Type: "pid"} 328 pc, err := daemon.getPidContainer(c) 329 if err != nil { 330 return err 331 } 332 ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID()) 333 setNamespace(s, ns) 334 if userNS { 335 // to share a PID namespace, they must also share a user namespace 336 nsUser := specs.LinuxNamespace{Type: "user"} 337 nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID()) 338 setNamespace(s, nsUser) 339 } 340 } else if c.HostConfig.PidMode.IsHost() { 341 oci.RemoveNamespace(s, specs.LinuxNamespaceType("pid")) 342 } else { 343 ns := specs.LinuxNamespace{Type: "pid"} 344 setNamespace(s, ns) 345 } 346 // uts 347 if c.HostConfig.UTSMode.IsHost() { 348 oci.RemoveNamespace(s, specs.LinuxNamespaceType("uts")) 349 s.Hostname = "" 350 } 351 352 return nil 353 } 354 355 func specMapping(s []idtools.IDMap) []specs.LinuxIDMapping { 356 var ids []specs.LinuxIDMapping 357 for _, item := range s { 358 ids = append(ids, specs.LinuxIDMapping{ 359 HostID: uint32(item.HostID), 360 ContainerID: uint32(item.ContainerID), 361 Size: uint32(item.Size), 362 }) 363 } 364 return ids 365 } 366 367 func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { 368 for _, m := range mountinfo { 369 if m.Mountpoint == dir { 370 return m 371 } 372 } 373 return nil 374 } 375 376 // Get the source mount point of directory passed in as argument. Also return 377 // optional fields. 378 func getSourceMount(source string) (string, string, error) { 379 // Ensure any symlinks are resolved. 380 sourcePath, err := filepath.EvalSymlinks(source) 381 if err != nil { 382 return "", "", err 383 } 384 385 mountinfos, err := mount.GetMounts() 386 if err != nil { 387 return "", "", err 388 } 389 390 mountinfo := getMountInfo(mountinfos, sourcePath) 391 if mountinfo != nil { 392 return sourcePath, mountinfo.Optional, nil 393 } 394 395 path := sourcePath 396 for { 397 path = filepath.Dir(path) 398 399 mountinfo = getMountInfo(mountinfos, path) 400 if mountinfo != nil { 401 return path, mountinfo.Optional, nil 402 } 403 404 if path == "/" { 405 break 406 } 407 } 408 409 // If we are here, we did not find parent mount. Something is wrong. 410 return "", "", fmt.Errorf("Could not find source mount of %s", source) 411 } 412 413 // Ensure mount point on which path is mounted, is shared. 414 func ensureShared(path string) error { 415 sharedMount := false 416 417 sourceMount, optionalOpts, err := getSourceMount(path) 418 if err != nil { 419 return err 420 } 421 // Make sure source mount point is shared. 422 optsSplit := strings.Split(optionalOpts, " ") 423 for _, opt := range optsSplit { 424 if strings.HasPrefix(opt, "shared:") { 425 sharedMount = true 426 break 427 } 428 } 429 430 if !sharedMount { 431 return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount) 432 } 433 return nil 434 } 435 436 // Ensure mount point on which path is mounted, is either shared or slave. 437 func ensureSharedOrSlave(path string) error { 438 sharedMount := false 439 slaveMount := false 440 441 sourceMount, optionalOpts, err := getSourceMount(path) 442 if err != nil { 443 return err 444 } 445 // Make sure source mount point is shared. 446 optsSplit := strings.Split(optionalOpts, " ") 447 for _, opt := range optsSplit { 448 if strings.HasPrefix(opt, "shared:") { 449 sharedMount = true 450 break 451 } else if strings.HasPrefix(opt, "master:") { 452 slaveMount = true 453 break 454 } 455 } 456 457 if !sharedMount && !slaveMount { 458 return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount) 459 } 460 return nil 461 } 462 463 var ( 464 mountPropagationMap = map[string]int{ 465 "private": mount.PRIVATE, 466 "rprivate": mount.RPRIVATE, 467 "shared": mount.SHARED, 468 "rshared": mount.RSHARED, 469 "slave": mount.SLAVE, 470 "rslave": mount.RSLAVE, 471 } 472 473 mountPropagationReverseMap = map[int]string{ 474 mount.PRIVATE: "private", 475 mount.RPRIVATE: "rprivate", 476 mount.SHARED: "shared", 477 mount.RSHARED: "rshared", 478 mount.SLAVE: "slave", 479 mount.RSLAVE: "rslave", 480 } 481 ) 482 483 func setMounts(daemon *Daemon, s *specs.Spec, c *container.Container, mounts []container.Mount) error { 484 userMounts := make(map[string]struct{}) 485 for _, m := range mounts { 486 userMounts[m.Destination] = struct{}{} 487 } 488 489 // Filter out mounts that are overridden by user supplied mounts 490 var defaultMounts []specs.Mount 491 _, mountDev := userMounts["/dev"] 492 for _, m := range s.Mounts { 493 if _, ok := userMounts[m.Destination]; !ok { 494 if mountDev && strings.HasPrefix(m.Destination, "/dev/") { 495 continue 496 } 497 defaultMounts = append(defaultMounts, m) 498 } 499 } 500 501 s.Mounts = defaultMounts 502 for _, m := range mounts { 503 for _, cm := range s.Mounts { 504 if cm.Destination == m.Destination { 505 return fmt.Errorf("Duplicate mount point '%s'", m.Destination) 506 } 507 } 508 509 if m.Source == "tmpfs" { 510 data := m.Data 511 options := []string{"noexec", "nosuid", "nodev", string(volume.DefaultPropagationMode)} 512 if data != "" { 513 options = append(options, strings.Split(data, ",")...) 514 } 515 516 merged, err := mount.MergeTmpfsOptions(options) 517 if err != nil { 518 return err 519 } 520 521 s.Mounts = append(s.Mounts, specs.Mount{Destination: m.Destination, Source: m.Source, Type: "tmpfs", Options: merged}) 522 continue 523 } 524 525 mt := specs.Mount{Destination: m.Destination, Source: m.Source, Type: "bind"} 526 527 // Determine property of RootPropagation based on volume 528 // properties. If a volume is shared, then keep root propagation 529 // shared. This should work for slave and private volumes too. 530 // 531 // For slave volumes, it can be either [r]shared/[r]slave. 532 // 533 // For private volumes any root propagation value should work. 534 pFlag := mountPropagationMap[m.Propagation] 535 if pFlag == mount.SHARED || pFlag == mount.RSHARED { 536 if err := ensureShared(m.Source); err != nil { 537 return err 538 } 539 rootpg := mountPropagationMap[s.Linux.RootfsPropagation] 540 if rootpg != mount.SHARED && rootpg != mount.RSHARED { 541 s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.SHARED] 542 } 543 } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE { 544 if err := ensureSharedOrSlave(m.Source); err != nil { 545 return err 546 } 547 rootpg := mountPropagationMap[s.Linux.RootfsPropagation] 548 if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE { 549 s.Linux.RootfsPropagation = mountPropagationReverseMap[mount.RSLAVE] 550 } 551 } 552 553 opts := []string{"rbind"} 554 if !m.Writable { 555 opts = append(opts, "ro") 556 } 557 if pFlag != 0 { 558 opts = append(opts, mountPropagationReverseMap[pFlag]) 559 } 560 561 mt.Options = opts 562 s.Mounts = append(s.Mounts, mt) 563 } 564 565 if s.Root.Readonly { 566 for i, m := range s.Mounts { 567 switch m.Destination { 568 case "/proc", "/dev/pts", "/dev/mqueue": // /dev is remounted by runc 569 continue 570 } 571 if _, ok := userMounts[m.Destination]; !ok { 572 if !stringutils.InSlice(m.Options, "ro") { 573 s.Mounts[i].Options = append(s.Mounts[i].Options, "ro") 574 } 575 } 576 } 577 } 578 579 if c.HostConfig.Privileged { 580 if !s.Root.Readonly { 581 // clear readonly for /sys 582 for i := range s.Mounts { 583 if s.Mounts[i].Destination == "/sys" { 584 clearReadOnly(&s.Mounts[i]) 585 } 586 } 587 } 588 s.Linux.ReadonlyPaths = nil 589 s.Linux.MaskedPaths = nil 590 } 591 592 // TODO: until a kernel/mount solution exists for handling remount in a user namespace, 593 // we must clear the readonly flag for the cgroups mount (@mrunalp concurs) 594 if uidMap := daemon.idMappings.UIDs(); uidMap != nil || c.HostConfig.Privileged { 595 for i, m := range s.Mounts { 596 if m.Type == "cgroup" { 597 clearReadOnly(&s.Mounts[i]) 598 } 599 } 600 } 601 602 return nil 603 } 604 605 func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error { 606 linkedEnv, err := daemon.setupLinkedContainers(c) 607 if err != nil { 608 return err 609 } 610 s.Root = specs.Root{ 611 Path: c.BaseFS, 612 Readonly: c.HostConfig.ReadonlyRootfs, 613 } 614 if err := c.SetupWorkingDirectory(daemon.idMappings.RootPair()); err != nil { 615 return err 616 } 617 cwd := c.Config.WorkingDir 618 if len(cwd) == 0 { 619 cwd = "/" 620 } 621 s.Process.Args = append([]string{c.Path}, c.Args...) 622 623 // only add the custom init if it is specified and the container is running in its 624 // own private pid namespace. It does not make sense to add if it is running in the 625 // host namespace or another container's pid namespace where we already have an init 626 if c.HostConfig.PidMode.IsPrivate() { 627 if (c.HostConfig.Init != nil && *c.HostConfig.Init) || 628 (c.HostConfig.Init == nil && daemon.configStore.Init) { 629 s.Process.Args = append([]string{"/dev/init", "--", c.Path}, c.Args...) 630 var path string 631 if daemon.configStore.InitPath == "" { 632 path, err = exec.LookPath(daemonconfig.DefaultInitBinary) 633 if err != nil { 634 return err 635 } 636 } 637 if daemon.configStore.InitPath != "" { 638 path = daemon.configStore.InitPath 639 } 640 s.Mounts = append(s.Mounts, specs.Mount{ 641 Destination: "/dev/init", 642 Type: "bind", 643 Source: path, 644 Options: []string{"bind", "ro"}, 645 }) 646 } 647 } 648 s.Process.Cwd = cwd 649 s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv) 650 s.Process.Terminal = c.Config.Tty 651 s.Hostname = c.FullHostname() 652 653 return nil 654 } 655 656 func (daemon *Daemon) createSpec(c *container.Container) (*specs.Spec, error) { 657 s := oci.DefaultSpec() 658 if err := daemon.populateCommonSpec(&s, c); err != nil { 659 return nil, err 660 } 661 662 var cgroupsPath string 663 scopePrefix := "docker" 664 parent := "/docker" 665 useSystemd := UsingSystemd(daemon.configStore) 666 if useSystemd { 667 parent = "system.slice" 668 } 669 670 if c.HostConfig.CgroupParent != "" { 671 parent = c.HostConfig.CgroupParent 672 } else if daemon.configStore.CgroupParent != "" { 673 parent = daemon.configStore.CgroupParent 674 } 675 676 if useSystemd { 677 cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID 678 logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath) 679 } else { 680 cgroupsPath = filepath.Join(parent, c.ID) 681 } 682 s.Linux.CgroupsPath = cgroupsPath 683 684 if err := setResources(&s, c.HostConfig.Resources); err != nil { 685 return nil, fmt.Errorf("linux runtime spec resources: %v", err) 686 } 687 s.Linux.Resources.OOMScoreAdj = &c.HostConfig.OomScoreAdj 688 s.Linux.Sysctl = c.HostConfig.Sysctls 689 690 p := s.Linux.CgroupsPath 691 if useSystemd { 692 initPath, err := cgroups.GetInitCgroup("cpu") 693 if err != nil { 694 return nil, err 695 } 696 p, _ = cgroups.GetOwnCgroup("cpu") 697 if err != nil { 698 return nil, err 699 } 700 p = filepath.Join(initPath, p) 701 } 702 703 // Clean path to guard against things like ../../../BAD 704 parentPath := filepath.Dir(p) 705 if !filepath.IsAbs(parentPath) { 706 parentPath = filepath.Clean("/" + parentPath) 707 } 708 709 if err := daemon.initCgroupsPath(parentPath); err != nil { 710 return nil, fmt.Errorf("linux init cgroups path: %v", err) 711 } 712 if err := setDevices(&s, c); err != nil { 713 return nil, fmt.Errorf("linux runtime spec devices: %v", err) 714 } 715 if err := setRlimits(daemon, &s, c); err != nil { 716 return nil, fmt.Errorf("linux runtime spec rlimits: %v", err) 717 } 718 if err := setUser(&s, c); err != nil { 719 return nil, fmt.Errorf("linux spec user: %v", err) 720 } 721 if err := setNamespaces(daemon, &s, c); err != nil { 722 return nil, fmt.Errorf("linux spec namespaces: %v", err) 723 } 724 if err := setCapabilities(&s, c); err != nil { 725 return nil, fmt.Errorf("linux spec capabilities: %v", err) 726 } 727 if err := setSeccomp(daemon, &s, c); err != nil { 728 return nil, fmt.Errorf("linux seccomp: %v", err) 729 } 730 731 if err := daemon.setupIpcDirs(c); err != nil { 732 return nil, err 733 } 734 735 if err := daemon.setupSecretDir(c); err != nil { 736 return nil, err 737 } 738 739 if err := daemon.setupConfigDir(c); err != nil { 740 return nil, err 741 } 742 743 ms, err := daemon.setupMounts(c) 744 if err != nil { 745 return nil, err 746 } 747 748 ms = append(ms, c.IpcMounts()...) 749 750 tmpfsMounts, err := c.TmpfsMounts() 751 if err != nil { 752 return nil, err 753 } 754 ms = append(ms, tmpfsMounts...) 755 756 if m := c.SecretMounts(); m != nil { 757 ms = append(ms, m...) 758 } 759 760 ms = append(ms, c.ConfigMounts()...) 761 762 sort.Sort(mounts(ms)) 763 if err := setMounts(daemon, &s, c, ms); err != nil { 764 return nil, fmt.Errorf("linux mounts: %v", err) 765 } 766 767 for _, ns := range s.Linux.Namespaces { 768 if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled { 769 target, err := os.Readlink(filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")) 770 if err != nil { 771 return nil, err 772 } 773 774 s.Hooks = &specs.Hooks{ 775 Prestart: []specs.Hook{{ 776 Path: target, // FIXME: cross-platform 777 Args: []string{"libnetwork-setkey", c.ID, daemon.netController.ID()}, 778 }}, 779 } 780 } 781 } 782 783 if apparmor.IsEnabled() { 784 var appArmorProfile string 785 if c.AppArmorProfile != "" { 786 appArmorProfile = c.AppArmorProfile 787 } else if c.HostConfig.Privileged { 788 appArmorProfile = "unconfined" 789 } else { 790 appArmorProfile = "docker-default" 791 } 792 793 if appArmorProfile == "docker-default" { 794 // Unattended upgrades and other fun services can unload AppArmor 795 // profiles inadvertently. Since we cannot store our profile in 796 // /etc/apparmor.d, nor can we practically add other ways of 797 // telling the system to keep our profile loaded, in order to make 798 // sure that we keep the default profile enabled we dynamically 799 // reload it if necessary. 800 if err := ensureDefaultAppArmorProfile(); err != nil { 801 return nil, err 802 } 803 } 804 805 s.Process.ApparmorProfile = appArmorProfile 806 } 807 s.Process.SelinuxLabel = c.GetProcessLabel() 808 s.Process.NoNewPrivileges = c.NoNewPrivileges 809 s.Linux.MountLabel = c.MountLabel 810 811 return (*specs.Spec)(&s), nil 812 } 813 814 func clearReadOnly(m *specs.Mount) { 815 var opt []string 816 for _, o := range m.Options { 817 if o != "ro" { 818 opt = append(opt, o) 819 } 820 } 821 m.Options = opt 822 } 823 824 // mergeUlimits merge the Ulimits from HostConfig with daemon defaults, and update HostConfig 825 func (daemon *Daemon) mergeUlimits(c *containertypes.HostConfig) { 826 ulimits := c.Ulimits 827 // Merge ulimits with daemon defaults 828 ulIdx := make(map[string]struct{}) 829 for _, ul := range ulimits { 830 ulIdx[ul.Name] = struct{}{} 831 } 832 for name, ul := range daemon.configStore.Ulimits { 833 if _, exists := ulIdx[name]; !exists { 834 ulimits = append(ulimits, ul) 835 } 836 } 837 c.Ulimits = ulimits 838 }