github.com/endocode/docker@v1.4.2-0.20160113120958-46eb4700391e/daemon/execdriver/native/create.go (about) 1 // +build linux,cgo 2 3 package native 4 5 import ( 6 "fmt" 7 "path/filepath" 8 "strings" 9 "syscall" 10 11 "github.com/docker/docker/daemon/execdriver" 12 derr "github.com/docker/docker/errors" 13 "github.com/docker/docker/pkg/mount" 14 15 "github.com/docker/docker/volume" 16 "github.com/opencontainers/runc/libcontainer/apparmor" 17 "github.com/opencontainers/runc/libcontainer/configs" 18 "github.com/opencontainers/runc/libcontainer/devices" 19 ) 20 21 // createContainer populates and configures the container type with the 22 // data provided by the execdriver.Command 23 func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) { 24 container = execdriver.InitContainer(c) 25 26 if err := d.createIpc(container, c); err != nil { 27 return nil, err 28 } 29 30 if err := d.createPid(container, c); err != nil { 31 return nil, err 32 } 33 34 if err := d.createUTS(container, c); err != nil { 35 return nil, err 36 } 37 38 if err := d.setupRemappedRoot(container, c); err != nil { 39 return nil, err 40 } 41 42 if err := d.createNetwork(container, c, hooks); err != nil { 43 return nil, err 44 } 45 46 if c.ProcessConfig.Privileged { 47 if !container.Readonlyfs { 48 // clear readonly for /sys 49 for i := range container.Mounts { 50 if container.Mounts[i].Destination == "/sys" { 51 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 52 } 53 } 54 container.ReadonlyPaths = nil 55 } 56 57 // clear readonly for cgroup 58 for i := range container.Mounts { 59 if container.Mounts[i].Device == "cgroup" { 60 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 61 } 62 } 63 64 container.MaskPaths = nil 65 if err := d.setPrivileged(container); err != nil { 66 return nil, err 67 } 68 } else { 69 if err := d.setCapabilities(container, c); err != nil { 70 return nil, err 71 } 72 73 if c.SeccompProfile == "" { 74 container.Seccomp = getDefaultSeccompProfile() 75 } 76 } 77 // add CAP_ prefix to all caps for new libcontainer update to match 78 // the spec format. 79 for i, s := range container.Capabilities { 80 if !strings.HasPrefix(s, "CAP_") { 81 container.Capabilities[i] = fmt.Sprintf("CAP_%s", s) 82 } 83 } 84 container.AdditionalGroups = c.GroupAdd 85 86 if c.AppArmorProfile != "" { 87 container.AppArmorProfile = c.AppArmorProfile 88 } 89 90 if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" { 91 container.Seccomp, err = loadSeccompProfile(c.SeccompProfile) 92 if err != nil { 93 return nil, err 94 } 95 } 96 97 if err := execdriver.SetupCgroups(container, c); err != nil { 98 return nil, err 99 } 100 101 container.OomScoreAdj = c.OomScoreAdj 102 103 if container.Readonlyfs { 104 for i := range container.Mounts { 105 switch container.Mounts[i].Destination { 106 case "/proc", "/dev", "/dev/pts": 107 continue 108 } 109 container.Mounts[i].Flags |= syscall.MS_RDONLY 110 } 111 112 /* These paths must be remounted as r/o */ 113 container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev") 114 } 115 116 if err := d.setupMounts(container, c); err != nil { 117 return nil, err 118 } 119 120 d.setupLabels(container, c) 121 d.setupRlimits(container, c) 122 return container, nil 123 } 124 125 func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error { 126 if c.Network == nil { 127 return nil 128 } 129 if c.Network.ContainerID != "" { 130 d.Lock() 131 active := d.activeContainers[c.Network.ContainerID] 132 d.Unlock() 133 134 if active == nil { 135 return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID) 136 } 137 138 state, err := active.State() 139 if err != nil { 140 return err 141 } 142 143 container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET]) 144 return nil 145 } 146 147 if c.Network.NamespacePath != "" { 148 container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath) 149 return nil 150 } 151 // only set up prestart hook if the namespace path is not set (this should be 152 // all cases *except* for --net=host shared networking) 153 container.Hooks = &configs.Hooks{ 154 Prestart: []configs.Hook{ 155 configs.NewFunctionHook(func(s configs.HookState) error { 156 if len(hooks.PreStart) > 0 { 157 for _, fnHook := range hooks.PreStart { 158 // A closed channel for OOM is returned here as it will be 159 // non-blocking and return the correct result when read. 160 chOOM := make(chan struct{}) 161 close(chOOM) 162 if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil { 163 return err 164 } 165 } 166 } 167 return nil 168 }), 169 }, 170 } 171 return nil 172 } 173 174 func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error { 175 if c.Ipc.HostIpc { 176 container.Namespaces.Remove(configs.NEWIPC) 177 return nil 178 } 179 180 if c.Ipc.ContainerID != "" { 181 d.Lock() 182 active := d.activeContainers[c.Ipc.ContainerID] 183 d.Unlock() 184 185 if active == nil { 186 return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID) 187 } 188 189 state, err := active.State() 190 if err != nil { 191 return err 192 } 193 container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC]) 194 } 195 196 return nil 197 } 198 199 func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error { 200 if c.Pid.HostPid { 201 container.Namespaces.Remove(configs.NEWPID) 202 return nil 203 } 204 205 return nil 206 } 207 208 func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error { 209 if c.UTS.HostUTS { 210 container.Namespaces.Remove(configs.NEWUTS) 211 container.Hostname = "" 212 return nil 213 } 214 215 return nil 216 } 217 218 func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error { 219 if c.RemappedRoot.UID == 0 { 220 container.Namespaces.Remove(configs.NEWUSER) 221 return nil 222 } 223 224 // convert the Docker daemon id map to the libcontainer variant of the same struct 225 // this keeps us from having to import libcontainer code across Docker client + daemon packages 226 cuidMaps := []configs.IDMap{} 227 cgidMaps := []configs.IDMap{} 228 for _, idMap := range c.UIDMapping { 229 cuidMaps = append(cuidMaps, configs.IDMap(idMap)) 230 } 231 for _, idMap := range c.GIDMapping { 232 cgidMaps = append(cgidMaps, configs.IDMap(idMap)) 233 } 234 container.UidMappings = cuidMaps 235 container.GidMappings = cgidMaps 236 237 for _, node := range container.Devices { 238 node.Uid = uint32(c.RemappedRoot.UID) 239 node.Gid = uint32(c.RemappedRoot.GID) 240 } 241 // TODO: until a kernel/mount solution exists for handling remount in a user namespace, 242 // we must clear the readonly flag for the cgroups mount (@mrunalp concurs) 243 for i := range container.Mounts { 244 if container.Mounts[i].Device == "cgroup" { 245 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 246 } 247 } 248 249 return nil 250 } 251 252 func (d *Driver) setPrivileged(container *configs.Config) (err error) { 253 container.Capabilities = execdriver.GetAllCapabilities() 254 container.Cgroups.Resources.AllowAllDevices = true 255 256 hostDevices, err := devices.HostDevices() 257 if err != nil { 258 return err 259 } 260 container.Devices = hostDevices 261 262 if apparmor.IsEnabled() { 263 container.AppArmorProfile = "unconfined" 264 } 265 return nil 266 } 267 268 func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) { 269 container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop) 270 return err 271 } 272 273 func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) { 274 if c.Resources == nil { 275 return 276 } 277 278 for _, rlimit := range c.Resources.Rlimits { 279 container.Rlimits = append(container.Rlimits, configs.Rlimit{ 280 Type: rlimit.Type, 281 Hard: rlimit.Hard, 282 Soft: rlimit.Soft, 283 }) 284 } 285 } 286 287 // If rootfs mount propagation is RPRIVATE, that means all the volumes are 288 // going to be private anyway. There is no need to apply per volume 289 // propagation on top. This is just an optimzation so that cost of per volume 290 // propagation is paid only if user decides to make some volume non-private 291 // which will force rootfs mount propagation to be non RPRIVATE. 292 func checkResetVolumePropagation(container *configs.Config) { 293 if container.RootPropagation != mount.RPRIVATE { 294 return 295 } 296 for _, m := range container.Mounts { 297 m.PropagationFlags = nil 298 } 299 } 300 301 func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { 302 for _, m := range mountinfo { 303 if m.Mountpoint == dir { 304 return m 305 } 306 } 307 return nil 308 } 309 310 // Get the source mount point of directory passed in as argument. Also return 311 // optional fields. 312 func getSourceMount(source string) (string, string, error) { 313 // Ensure any symlinks are resolved. 314 sourcePath, err := filepath.EvalSymlinks(source) 315 if err != nil { 316 return "", "", err 317 } 318 319 mountinfos, err := mount.GetMounts() 320 if err != nil { 321 return "", "", err 322 } 323 324 mountinfo := getMountInfo(mountinfos, sourcePath) 325 if mountinfo != nil { 326 return sourcePath, mountinfo.Optional, nil 327 } 328 329 path := sourcePath 330 for { 331 path = filepath.Dir(path) 332 333 mountinfo = getMountInfo(mountinfos, path) 334 if mountinfo != nil { 335 return path, mountinfo.Optional, nil 336 } 337 338 if path == "/" { 339 break 340 } 341 } 342 343 // If we are here, we did not find parent mount. Something is wrong. 344 return "", "", fmt.Errorf("Could not find source mount of %s", source) 345 } 346 347 // Ensure mount point on which path is mouted, is shared. 348 func ensureShared(path string) error { 349 sharedMount := false 350 351 sourceMount, optionalOpts, err := getSourceMount(path) 352 if err != nil { 353 return err 354 } 355 // Make sure source mount point is shared. 356 optsSplit := strings.Split(optionalOpts, " ") 357 for _, opt := range optsSplit { 358 if strings.HasPrefix(opt, "shared:") { 359 sharedMount = true 360 break 361 } 362 } 363 364 if !sharedMount { 365 return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount) 366 } 367 return nil 368 } 369 370 // Ensure mount point on which path is mounted, is either shared or slave. 371 func ensureSharedOrSlave(path string) error { 372 sharedMount := false 373 slaveMount := false 374 375 sourceMount, optionalOpts, err := getSourceMount(path) 376 if err != nil { 377 return err 378 } 379 // Make sure source mount point is shared. 380 optsSplit := strings.Split(optionalOpts, " ") 381 for _, opt := range optsSplit { 382 if strings.HasPrefix(opt, "shared:") { 383 sharedMount = true 384 break 385 } else if strings.HasPrefix(opt, "master:") { 386 slaveMount = true 387 break 388 } 389 } 390 391 if !sharedMount && !slaveMount { 392 return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount) 393 } 394 return nil 395 } 396 397 func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error { 398 userMounts := make(map[string]struct{}) 399 for _, m := range c.Mounts { 400 userMounts[m.Destination] = struct{}{} 401 } 402 403 // Filter out mounts that are overridden by user supplied mounts 404 var defaultMounts []*configs.Mount 405 _, mountDev := userMounts["/dev"] 406 for _, m := range container.Mounts { 407 if _, ok := userMounts[m.Destination]; !ok { 408 if mountDev && strings.HasPrefix(m.Destination, "/dev/") { 409 container.Devices = nil 410 continue 411 } 412 defaultMounts = append(defaultMounts, m) 413 } 414 } 415 container.Mounts = defaultMounts 416 417 mountPropagationMap := map[string]int{ 418 "private": mount.PRIVATE, 419 "rprivate": mount.RPRIVATE, 420 "shared": mount.SHARED, 421 "rshared": mount.RSHARED, 422 "slave": mount.SLAVE, 423 "rslave": mount.RSLAVE, 424 } 425 426 for _, m := range c.Mounts { 427 for _, cm := range container.Mounts { 428 if cm.Destination == m.Destination { 429 return derr.ErrorCodeMountDup.WithArgs(m.Destination) 430 } 431 } 432 433 if m.Source == "tmpfs" { 434 var ( 435 data = "size=65536k" 436 flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV 437 err error 438 ) 439 fulldest := filepath.Join(c.Rootfs, m.Destination) 440 if m.Data != "" { 441 flags, data, err = mount.ParseTmpfsOptions(m.Data) 442 if err != nil { 443 return err 444 } 445 } 446 container.Mounts = append(container.Mounts, &configs.Mount{ 447 Source: m.Source, 448 Destination: m.Destination, 449 Data: data, 450 Device: "tmpfs", 451 Flags: flags, 452 PremountCmds: genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination), 453 PostmountCmds: genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination), 454 PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]}, 455 }) 456 continue 457 } 458 flags := syscall.MS_BIND | syscall.MS_REC 459 var pFlag int 460 if !m.Writable { 461 flags |= syscall.MS_RDONLY 462 } 463 464 // Determine property of RootPropagation based on volume 465 // properties. If a volume is shared, then keep root propagtion 466 // shared. This should work for slave and private volumes too. 467 // 468 // For slave volumes, it can be either [r]shared/[r]slave. 469 // 470 // For private volumes any root propagation value should work. 471 472 pFlag = mountPropagationMap[m.Propagation] 473 if pFlag == mount.SHARED || pFlag == mount.RSHARED { 474 if err := ensureShared(m.Source); err != nil { 475 return err 476 } 477 rootpg := container.RootPropagation 478 if rootpg != mount.SHARED && rootpg != mount.RSHARED { 479 execdriver.SetRootPropagation(container, mount.SHARED) 480 } 481 } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE { 482 if err := ensureSharedOrSlave(m.Source); err != nil { 483 return err 484 } 485 rootpg := container.RootPropagation 486 if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE { 487 execdriver.SetRootPropagation(container, mount.RSLAVE) 488 } 489 } 490 491 mount := &configs.Mount{ 492 Source: m.Source, 493 Destination: m.Destination, 494 Device: "bind", 495 Flags: flags, 496 } 497 498 if pFlag != 0 { 499 mount.PropagationFlags = []int{pFlag} 500 } 501 502 container.Mounts = append(container.Mounts, mount) 503 } 504 505 checkResetVolumePropagation(container) 506 return nil 507 } 508 509 func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) { 510 container.ProcessLabel = c.ProcessLabel 511 container.MountLabel = c.MountLabel 512 }