github.com/walkingsparrow/docker@v1.4.2-0.20151218153551-b708a2249bfa/daemon/execdriver/native/create.go (about) 1 // +build linux,cgo 2 3 package native 4 5 import ( 6 "fmt" 7 "path/filepath" 8 "strings" 9 "syscall" 10 11 "github.com/docker/docker/daemon/execdriver" 12 derr "github.com/docker/docker/errors" 13 "github.com/docker/docker/pkg/mount" 14 15 "github.com/docker/docker/volume" 16 "github.com/opencontainers/runc/libcontainer/apparmor" 17 "github.com/opencontainers/runc/libcontainer/configs" 18 "github.com/opencontainers/runc/libcontainer/devices" 19 ) 20 21 // createContainer populates and configures the container type with the 22 // data provided by the execdriver.Command 23 func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) { 24 container = execdriver.InitContainer(c) 25 26 if err := d.createIpc(container, c); err != nil { 27 return nil, err 28 } 29 30 if err := d.createPid(container, c); err != nil { 31 return nil, err 32 } 33 34 if err := d.createUTS(container, c); err != nil { 35 return nil, err 36 } 37 38 if err := d.setupRemappedRoot(container, c); err != nil { 39 return nil, err 40 } 41 42 if err := d.createNetwork(container, c, hooks); err != nil { 43 return nil, err 44 } 45 46 if c.ProcessConfig.Privileged { 47 if !container.Readonlyfs { 48 // clear readonly for /sys 49 for i := range container.Mounts { 50 if container.Mounts[i].Destination == "/sys" { 51 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 52 } 53 } 54 container.ReadonlyPaths = nil 55 } 56 57 // clear readonly for cgroup 58 for i := range container.Mounts { 59 if container.Mounts[i].Device == "cgroup" { 60 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 61 } 62 } 63 64 container.MaskPaths = nil 65 if err := d.setPrivileged(container); err != nil { 66 return nil, err 67 } 68 } else { 69 if err := d.setCapabilities(container, c); err != nil { 70 return nil, err 71 } 72 } 73 // add CAP_ prefix to all caps for new libcontainer update to match 74 // the spec format. 75 for i, s := range container.Capabilities { 76 if !strings.HasPrefix(s, "CAP_") { 77 container.Capabilities[i] = fmt.Sprintf("CAP_%s", s) 78 } 79 } 80 container.AdditionalGroups = c.GroupAdd 81 82 if c.AppArmorProfile != "" { 83 container.AppArmorProfile = c.AppArmorProfile 84 } 85 86 if c.SeccompProfile != "" { 87 container.Seccomp, err = loadSeccompProfile(c.SeccompProfile) 88 if err != nil { 89 return nil, err 90 } 91 } 92 if err := execdriver.SetupCgroups(container, c); err != nil { 93 return nil, err 94 } 95 96 container.OomScoreAdj = c.OomScoreAdj 97 98 if container.Readonlyfs { 99 for i := range container.Mounts { 100 switch container.Mounts[i].Destination { 101 case "/proc", "/dev", "/dev/pts": 102 continue 103 } 104 container.Mounts[i].Flags |= syscall.MS_RDONLY 105 } 106 107 /* These paths must be remounted as r/o */ 108 container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev") 109 } 110 111 if err := d.setupMounts(container, c); err != nil { 112 return nil, err 113 } 114 115 d.setupLabels(container, c) 116 d.setupRlimits(container, c) 117 return container, nil 118 } 119 120 func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error { 121 if c.Network == nil { 122 return nil 123 } 124 if c.Network.ContainerID != "" { 125 d.Lock() 126 active := d.activeContainers[c.Network.ContainerID] 127 d.Unlock() 128 129 if active == nil { 130 return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID) 131 } 132 133 state, err := active.State() 134 if err != nil { 135 return err 136 } 137 138 container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET]) 139 return nil 140 } 141 142 if c.Network.NamespacePath != "" { 143 container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath) 144 return nil 145 } 146 // only set up prestart hook if the namespace path is not set (this should be 147 // all cases *except* for --net=host shared networking) 148 container.Hooks = &configs.Hooks{ 149 Prestart: []configs.Hook{ 150 configs.NewFunctionHook(func(s configs.HookState) error { 151 if len(hooks.PreStart) > 0 { 152 for _, fnHook := range hooks.PreStart { 153 // A closed channel for OOM is returned here as it will be 154 // non-blocking and return the correct result when read. 155 chOOM := make(chan struct{}) 156 close(chOOM) 157 if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil { 158 return err 159 } 160 } 161 } 162 return nil 163 }), 164 }, 165 } 166 return nil 167 } 168 169 func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error { 170 if c.Ipc.HostIpc { 171 container.Namespaces.Remove(configs.NEWIPC) 172 return nil 173 } 174 175 if c.Ipc.ContainerID != "" { 176 d.Lock() 177 active := d.activeContainers[c.Ipc.ContainerID] 178 d.Unlock() 179 180 if active == nil { 181 return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID) 182 } 183 184 state, err := active.State() 185 if err != nil { 186 return err 187 } 188 container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC]) 189 } 190 191 return nil 192 } 193 194 func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error { 195 if c.Pid.HostPid { 196 container.Namespaces.Remove(configs.NEWPID) 197 return nil 198 } 199 200 return nil 201 } 202 203 func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error { 204 if c.UTS.HostUTS { 205 container.Namespaces.Remove(configs.NEWUTS) 206 container.Hostname = "" 207 return nil 208 } 209 210 return nil 211 } 212 213 func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error { 214 if c.RemappedRoot.UID == 0 { 215 container.Namespaces.Remove(configs.NEWUSER) 216 return nil 217 } 218 219 // convert the Docker daemon id map to the libcontainer variant of the same struct 220 // this keeps us from having to import libcontainer code across Docker client + daemon packages 221 cuidMaps := []configs.IDMap{} 222 cgidMaps := []configs.IDMap{} 223 for _, idMap := range c.UIDMapping { 224 cuidMaps = append(cuidMaps, configs.IDMap(idMap)) 225 } 226 for _, idMap := range c.GIDMapping { 227 cgidMaps = append(cgidMaps, configs.IDMap(idMap)) 228 } 229 container.UidMappings = cuidMaps 230 container.GidMappings = cgidMaps 231 232 for _, node := range container.Devices { 233 node.Uid = uint32(c.RemappedRoot.UID) 234 node.Gid = uint32(c.RemappedRoot.GID) 235 } 236 // TODO: until a kernel/mount solution exists for handling remount in a user namespace, 237 // we must clear the readonly flag for the cgroups mount (@mrunalp concurs) 238 for i := range container.Mounts { 239 if container.Mounts[i].Device == "cgroup" { 240 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 241 } 242 } 243 244 return nil 245 } 246 247 func (d *Driver) setPrivileged(container *configs.Config) (err error) { 248 container.Capabilities = execdriver.GetAllCapabilities() 249 container.Cgroups.AllowAllDevices = true 250 251 hostDevices, err := devices.HostDevices() 252 if err != nil { 253 return err 254 } 255 container.Devices = hostDevices 256 257 if apparmor.IsEnabled() { 258 container.AppArmorProfile = "unconfined" 259 } 260 return nil 261 } 262 263 func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) { 264 container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop) 265 return err 266 } 267 268 func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) { 269 if c.Resources == nil { 270 return 271 } 272 273 for _, rlimit := range c.Resources.Rlimits { 274 container.Rlimits = append(container.Rlimits, configs.Rlimit{ 275 Type: rlimit.Type, 276 Hard: rlimit.Hard, 277 Soft: rlimit.Soft, 278 }) 279 } 280 } 281 282 // If rootfs mount propagation is RPRIVATE, that means all the volumes are 283 // going to be private anyway. There is no need to apply per volume 284 // propagation on top. This is just an optimzation so that cost of per volume 285 // propagation is paid only if user decides to make some volume non-private 286 // which will force rootfs mount propagation to be non RPRIVATE. 287 func checkResetVolumePropagation(container *configs.Config) { 288 if container.RootPropagation != mount.RPRIVATE { 289 return 290 } 291 for _, m := range container.Mounts { 292 m.PropagationFlags = nil 293 } 294 } 295 296 func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { 297 for _, m := range mountinfo { 298 if m.Mountpoint == dir { 299 return m 300 } 301 } 302 return nil 303 } 304 305 // Get the source mount point of directory passed in as argument. Also return 306 // optional fields. 307 func getSourceMount(source string) (string, string, error) { 308 // Ensure any symlinks are resolved. 309 sourcePath, err := filepath.EvalSymlinks(source) 310 if err != nil { 311 return "", "", err 312 } 313 314 mountinfos, err := mount.GetMounts() 315 if err != nil { 316 return "", "", err 317 } 318 319 mountinfo := getMountInfo(mountinfos, sourcePath) 320 if mountinfo != nil { 321 return sourcePath, mountinfo.Optional, nil 322 } 323 324 path := sourcePath 325 for { 326 path = filepath.Dir(path) 327 328 mountinfo = getMountInfo(mountinfos, path) 329 if mountinfo != nil { 330 return path, mountinfo.Optional, nil 331 } 332 333 if path == "/" { 334 break 335 } 336 } 337 338 // If we are here, we did not find parent mount. Something is wrong. 339 return "", "", fmt.Errorf("Could not find source mount of %s", source) 340 } 341 342 // Ensure mount point on which path is mouted, is shared. 343 func ensureShared(path string) error { 344 sharedMount := false 345 346 sourceMount, optionalOpts, err := getSourceMount(path) 347 if err != nil { 348 return err 349 } 350 // Make sure source mount point is shared. 351 optsSplit := strings.Split(optionalOpts, " ") 352 for _, opt := range optsSplit { 353 if strings.HasPrefix(opt, "shared:") { 354 sharedMount = true 355 break 356 } 357 } 358 359 if !sharedMount { 360 return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount) 361 } 362 return nil 363 } 364 365 // Ensure mount point on which path is mounted, is either shared or slave. 366 func ensureSharedOrSlave(path string) error { 367 sharedMount := false 368 slaveMount := false 369 370 sourceMount, optionalOpts, err := getSourceMount(path) 371 if err != nil { 372 return err 373 } 374 // Make sure source mount point is shared. 375 optsSplit := strings.Split(optionalOpts, " ") 376 for _, opt := range optsSplit { 377 if strings.HasPrefix(opt, "shared:") { 378 sharedMount = true 379 break 380 } else if strings.HasPrefix(opt, "master:") { 381 slaveMount = true 382 break 383 } 384 } 385 386 if !sharedMount && !slaveMount { 387 return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount) 388 } 389 return nil 390 } 391 392 func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error { 393 userMounts := make(map[string]struct{}) 394 for _, m := range c.Mounts { 395 userMounts[m.Destination] = struct{}{} 396 } 397 398 // Filter out mounts that are overridden by user supplied mounts 399 var defaultMounts []*configs.Mount 400 _, mountDev := userMounts["/dev"] 401 for _, m := range container.Mounts { 402 if _, ok := userMounts[m.Destination]; !ok { 403 if mountDev && strings.HasPrefix(m.Destination, "/dev/") { 404 container.Devices = nil 405 continue 406 } 407 defaultMounts = append(defaultMounts, m) 408 } 409 } 410 container.Mounts = defaultMounts 411 412 mountPropagationMap := map[string]int{ 413 "private": mount.PRIVATE, 414 "rprivate": mount.RPRIVATE, 415 "shared": mount.SHARED, 416 "rshared": mount.RSHARED, 417 "slave": mount.SLAVE, 418 "rslave": mount.RSLAVE, 419 } 420 421 for _, m := range c.Mounts { 422 for _, cm := range container.Mounts { 423 if cm.Destination == m.Destination { 424 return derr.ErrorCodeMountDup.WithArgs(m.Destination) 425 } 426 } 427 428 if m.Source == "tmpfs" { 429 var ( 430 data = "size=65536k" 431 flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV 432 err error 433 ) 434 fulldest := filepath.Join(c.Rootfs, m.Destination) 435 if m.Data != "" { 436 flags, data, err = mount.ParseTmpfsOptions(m.Data) 437 if err != nil { 438 return err 439 } 440 } 441 container.Mounts = append(container.Mounts, &configs.Mount{ 442 Source: m.Source, 443 Destination: m.Destination, 444 Data: data, 445 Device: "tmpfs", 446 Flags: flags, 447 PremountCmds: genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination), 448 PostmountCmds: genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination), 449 PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]}, 450 }) 451 continue 452 } 453 flags := syscall.MS_BIND | syscall.MS_REC 454 var pFlag int 455 if !m.Writable { 456 flags |= syscall.MS_RDONLY 457 } 458 459 // Determine property of RootPropagation based on volume 460 // properties. If a volume is shared, then keep root propagtion 461 // shared. This should work for slave and private volumes too. 462 // 463 // For slave volumes, it can be either [r]shared/[r]slave. 464 // 465 // For private volumes any root propagation value should work. 466 467 pFlag = mountPropagationMap[m.Propagation] 468 if pFlag == mount.SHARED || pFlag == mount.RSHARED { 469 if err := ensureShared(m.Source); err != nil { 470 return err 471 } 472 rootpg := container.RootPropagation 473 if rootpg != mount.SHARED && rootpg != mount.RSHARED { 474 execdriver.SetRootPropagation(container, mount.SHARED) 475 } 476 } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE { 477 if err := ensureSharedOrSlave(m.Source); err != nil { 478 return err 479 } 480 rootpg := container.RootPropagation 481 if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE { 482 execdriver.SetRootPropagation(container, mount.RSLAVE) 483 } 484 } 485 486 mount := &configs.Mount{ 487 Source: m.Source, 488 Destination: m.Destination, 489 Device: "bind", 490 Flags: flags, 491 } 492 493 if pFlag != 0 { 494 mount.PropagationFlags = []int{pFlag} 495 } 496 497 container.Mounts = append(container.Mounts, mount) 498 } 499 500 checkResetVolumePropagation(container) 501 return nil 502 } 503 504 func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) { 505 container.ProcessLabel = c.ProcessLabel 506 container.MountLabel = c.MountLabel 507 }