github.com/ncdc/docker@v0.10.1-0.20160129113957-6c6729ef5b74/daemon/execdriver/native/create.go (about) 1 // +build linux,cgo 2 3 package native 4 5 import ( 6 "fmt" 7 "path/filepath" 8 "strings" 9 "syscall" 10 11 "github.com/docker/docker/daemon/execdriver" 12 derr "github.com/docker/docker/errors" 13 "github.com/docker/docker/pkg/mount" 14 "github.com/docker/docker/profiles/seccomp" 15 16 "github.com/docker/docker/volume" 17 "github.com/opencontainers/runc/libcontainer/apparmor" 18 "github.com/opencontainers/runc/libcontainer/configs" 19 "github.com/opencontainers/runc/libcontainer/devices" 20 ) 21 22 // createContainer populates and configures the container type with the 23 // data provided by the execdriver.Command 24 func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) { 25 container = execdriver.InitContainer(c) 26 27 if err := d.createIpc(container, c); err != nil { 28 return nil, err 29 } 30 31 if err := d.createPid(container, c); err != nil { 32 return nil, err 33 } 34 35 if err := d.createUTS(container, c); err != nil { 36 return nil, err 37 } 38 39 if err := d.setupRemappedRoot(container, c); err != nil { 40 return nil, err 41 } 42 43 if err := d.createNetwork(container, c, hooks); err != nil { 44 return nil, err 45 } 46 47 if c.ProcessConfig.Privileged { 48 if !container.Readonlyfs { 49 // clear readonly for /sys 50 for i := range container.Mounts { 51 if container.Mounts[i].Destination == "/sys" { 52 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 53 } 54 } 55 container.ReadonlyPaths = nil 56 } 57 58 // clear readonly for cgroup 59 for i := range container.Mounts { 60 if container.Mounts[i].Device == "cgroup" { 61 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 62 } 63 } 64 65 container.MaskPaths = nil 66 if err := d.setPrivileged(container); err != nil { 67 return nil, err 68 } 69 } else { 70 if err := d.setCapabilities(container, c); err != nil { 71 return nil, err 72 } 73 74 if c.SeccompProfile == "" { 75 container.Seccomp = seccomp.GetDefaultProfile() 76 } 77 } 78 // add CAP_ prefix to all caps for new libcontainer update to match 79 // the spec format. 80 for i, s := range container.Capabilities { 81 if !strings.HasPrefix(s, "CAP_") { 82 container.Capabilities[i] = fmt.Sprintf("CAP_%s", s) 83 } 84 } 85 container.AdditionalGroups = c.GroupAdd 86 87 if c.AppArmorProfile != "" { 88 container.AppArmorProfile = c.AppArmorProfile 89 } 90 91 if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" { 92 container.Seccomp, err = seccomp.LoadProfile(c.SeccompProfile) 93 if err != nil { 94 return nil, err 95 } 96 } 97 98 if err := execdriver.SetupCgroups(container, c); err != nil { 99 return nil, err 100 } 101 102 container.OomScoreAdj = c.OomScoreAdj 103 104 if container.Readonlyfs { 105 for i := range container.Mounts { 106 switch container.Mounts[i].Destination { 107 case "/proc", "/dev", "/dev/pts": 108 continue 109 } 110 container.Mounts[i].Flags |= syscall.MS_RDONLY 111 } 112 113 /* These paths must be remounted as r/o */ 114 container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev") 115 } 116 117 if err := d.setupMounts(container, c); err != nil { 118 return nil, err 119 } 120 121 d.setupLabels(container, c) 122 d.setupRlimits(container, c) 123 return container, nil 124 } 125 126 func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error { 127 if c.Network == nil { 128 return nil 129 } 130 if c.Network.ContainerID != "" { 131 d.Lock() 132 active := d.activeContainers[c.Network.ContainerID] 133 d.Unlock() 134 135 if active == nil { 136 return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID) 137 } 138 139 state, err := active.State() 140 if err != nil { 141 return err 142 } 143 144 container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET]) 145 return nil 146 } 147 148 if c.Network.NamespacePath != "" { 149 container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath) 150 return nil 151 } 152 // only set up prestart hook if the namespace path is not set (this should be 153 // all cases *except* for --net=host shared networking) 154 container.Hooks = &configs.Hooks{ 155 Prestart: []configs.Hook{ 156 configs.NewFunctionHook(func(s configs.HookState) error { 157 if len(hooks.PreStart) > 0 { 158 for _, fnHook := range hooks.PreStart { 159 // A closed channel for OOM is returned here as it will be 160 // non-blocking and return the correct result when read. 161 chOOM := make(chan struct{}) 162 close(chOOM) 163 if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil { 164 return err 165 } 166 } 167 } 168 return nil 169 }), 170 }, 171 } 172 return nil 173 } 174 175 func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error { 176 if c.Ipc.HostIpc { 177 container.Namespaces.Remove(configs.NEWIPC) 178 return nil 179 } 180 181 if c.Ipc.ContainerID != "" { 182 d.Lock() 183 active := d.activeContainers[c.Ipc.ContainerID] 184 d.Unlock() 185 186 if active == nil { 187 return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID) 188 } 189 190 state, err := active.State() 191 if err != nil { 192 return err 193 } 194 container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC]) 195 } 196 197 return nil 198 } 199 200 func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error { 201 if c.Pid.HostPid { 202 container.Namespaces.Remove(configs.NEWPID) 203 return nil 204 } 205 206 return nil 207 } 208 209 func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error { 210 if c.UTS.HostUTS { 211 container.Namespaces.Remove(configs.NEWUTS) 212 container.Hostname = "" 213 return nil 214 } 215 216 return nil 217 } 218 219 func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error { 220 if c.RemappedRoot.UID == 0 { 221 container.Namespaces.Remove(configs.NEWUSER) 222 return nil 223 } 224 225 // convert the Docker daemon id map to the libcontainer variant of the same struct 226 // this keeps us from having to import libcontainer code across Docker client + daemon packages 227 cuidMaps := []configs.IDMap{} 228 cgidMaps := []configs.IDMap{} 229 for _, idMap := range c.UIDMapping { 230 cuidMaps = append(cuidMaps, configs.IDMap(idMap)) 231 } 232 for _, idMap := range c.GIDMapping { 233 cgidMaps = append(cgidMaps, configs.IDMap(idMap)) 234 } 235 container.UidMappings = cuidMaps 236 container.GidMappings = cgidMaps 237 238 for _, node := range container.Devices { 239 node.Uid = uint32(c.RemappedRoot.UID) 240 node.Gid = uint32(c.RemappedRoot.GID) 241 } 242 // TODO: until a kernel/mount solution exists for handling remount in a user namespace, 243 // we must clear the readonly flag for the cgroups mount (@mrunalp concurs) 244 for i := range container.Mounts { 245 if container.Mounts[i].Device == "cgroup" { 246 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 247 } 248 } 249 250 return nil 251 } 252 253 func (d *Driver) setPrivileged(container *configs.Config) (err error) { 254 container.Capabilities = execdriver.GetAllCapabilities() 255 container.Cgroups.Resources.AllowAllDevices = true 256 257 hostDevices, err := devices.HostDevices() 258 if err != nil { 259 return err 260 } 261 container.Devices = hostDevices 262 263 if apparmor.IsEnabled() { 264 container.AppArmorProfile = "unconfined" 265 } 266 return nil 267 } 268 269 func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) { 270 container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop) 271 return err 272 } 273 274 func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) { 275 if c.Resources == nil { 276 return 277 } 278 279 for _, rlimit := range c.Resources.Rlimits { 280 container.Rlimits = append(container.Rlimits, configs.Rlimit{ 281 Type: rlimit.Type, 282 Hard: rlimit.Hard, 283 Soft: rlimit.Soft, 284 }) 285 } 286 } 287 288 // If rootfs mount propagation is RPRIVATE, that means all the volumes are 289 // going to be private anyway. There is no need to apply per volume 290 // propagation on top. This is just an optimization so that cost of per volume 291 // propagation is paid only if user decides to make some volume non-private 292 // which will force rootfs mount propagation to be non RPRIVATE. 293 func checkResetVolumePropagation(container *configs.Config) { 294 if container.RootPropagation != mount.RPRIVATE { 295 return 296 } 297 for _, m := range container.Mounts { 298 m.PropagationFlags = nil 299 } 300 } 301 302 func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { 303 for _, m := range mountinfo { 304 if m.Mountpoint == dir { 305 return m 306 } 307 } 308 return nil 309 } 310 311 // Get the source mount point of directory passed in as argument. Also return 312 // optional fields. 313 func getSourceMount(source string) (string, string, error) { 314 // Ensure any symlinks are resolved. 315 sourcePath, err := filepath.EvalSymlinks(source) 316 if err != nil { 317 return "", "", err 318 } 319 320 mountinfos, err := mount.GetMounts() 321 if err != nil { 322 return "", "", err 323 } 324 325 mountinfo := getMountInfo(mountinfos, sourcePath) 326 if mountinfo != nil { 327 return sourcePath, mountinfo.Optional, nil 328 } 329 330 path := sourcePath 331 for { 332 path = filepath.Dir(path) 333 334 mountinfo = getMountInfo(mountinfos, path) 335 if mountinfo != nil { 336 return path, mountinfo.Optional, nil 337 } 338 339 if path == "/" { 340 break 341 } 342 } 343 344 // If we are here, we did not find parent mount. Something is wrong. 345 return "", "", fmt.Errorf("Could not find source mount of %s", source) 346 } 347 348 // Ensure mount point on which path is mounted, is shared. 349 func ensureShared(path string) error { 350 sharedMount := false 351 352 sourceMount, optionalOpts, err := getSourceMount(path) 353 if err != nil { 354 return err 355 } 356 // Make sure source mount point is shared. 357 optsSplit := strings.Split(optionalOpts, " ") 358 for _, opt := range optsSplit { 359 if strings.HasPrefix(opt, "shared:") { 360 sharedMount = true 361 break 362 } 363 } 364 365 if !sharedMount { 366 return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount) 367 } 368 return nil 369 } 370 371 // Ensure mount point on which path is mounted, is either shared or slave. 372 func ensureSharedOrSlave(path string) error { 373 sharedMount := false 374 slaveMount := false 375 376 sourceMount, optionalOpts, err := getSourceMount(path) 377 if err != nil { 378 return err 379 } 380 // Make sure source mount point is shared. 381 optsSplit := strings.Split(optionalOpts, " ") 382 for _, opt := range optsSplit { 383 if strings.HasPrefix(opt, "shared:") { 384 sharedMount = true 385 break 386 } else if strings.HasPrefix(opt, "master:") { 387 slaveMount = true 388 break 389 } 390 } 391 392 if !sharedMount && !slaveMount { 393 return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount) 394 } 395 return nil 396 } 397 398 func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error { 399 userMounts := make(map[string]struct{}) 400 for _, m := range c.Mounts { 401 userMounts[m.Destination] = struct{}{} 402 } 403 404 // Filter out mounts that are overridden by user supplied mounts 405 var defaultMounts []*configs.Mount 406 _, mountDev := userMounts["/dev"] 407 for _, m := range container.Mounts { 408 if _, ok := userMounts[m.Destination]; !ok { 409 if mountDev && strings.HasPrefix(m.Destination, "/dev/") { 410 container.Devices = nil 411 continue 412 } 413 defaultMounts = append(defaultMounts, m) 414 } 415 } 416 container.Mounts = defaultMounts 417 418 mountPropagationMap := map[string]int{ 419 "private": mount.PRIVATE, 420 "rprivate": mount.RPRIVATE, 421 "shared": mount.SHARED, 422 "rshared": mount.RSHARED, 423 "slave": mount.SLAVE, 424 "rslave": mount.RSLAVE, 425 } 426 427 for _, m := range c.Mounts { 428 for _, cm := range container.Mounts { 429 if cm.Destination == m.Destination { 430 return derr.ErrorCodeMountDup.WithArgs(m.Destination) 431 } 432 } 433 434 if m.Source == "tmpfs" { 435 var ( 436 data = "size=65536k" 437 flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV 438 err error 439 ) 440 if m.Data != "" { 441 flags, data, err = mount.ParseTmpfsOptions(m.Data) 442 if err != nil { 443 return err 444 } 445 } 446 container.Mounts = append(container.Mounts, &configs.Mount{ 447 Source: m.Source, 448 Destination: m.Destination, 449 Data: data, 450 Device: "tmpfs", 451 Flags: flags, 452 PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]}, 453 }) 454 continue 455 } 456 flags := syscall.MS_BIND | syscall.MS_REC 457 var pFlag int 458 if !m.Writable { 459 flags |= syscall.MS_RDONLY 460 } 461 462 // Determine property of RootPropagation based on volume 463 // properties. If a volume is shared, then keep root propagation 464 // shared. This should work for slave and private volumes too. 465 // 466 // For slave volumes, it can be either [r]shared/[r]slave. 467 // 468 // For private volumes any root propagation value should work. 469 470 pFlag = mountPropagationMap[m.Propagation] 471 if pFlag == mount.SHARED || pFlag == mount.RSHARED { 472 if err := ensureShared(m.Source); err != nil { 473 return err 474 } 475 rootpg := container.RootPropagation 476 if rootpg != mount.SHARED && rootpg != mount.RSHARED { 477 execdriver.SetRootPropagation(container, mount.SHARED) 478 } 479 } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE { 480 if err := ensureSharedOrSlave(m.Source); err != nil { 481 return err 482 } 483 rootpg := container.RootPropagation 484 if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE { 485 execdriver.SetRootPropagation(container, mount.RSLAVE) 486 } 487 } 488 489 mount := &configs.Mount{ 490 Source: m.Source, 491 Destination: m.Destination, 492 Device: "bind", 493 Flags: flags, 494 } 495 496 if pFlag != 0 { 497 mount.PropagationFlags = []int{pFlag} 498 } 499 500 container.Mounts = append(container.Mounts, mount) 501 } 502 503 checkResetVolumePropagation(container) 504 return nil 505 } 506 507 func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) { 508 container.ProcessLabel = c.ProcessLabel 509 container.MountLabel = c.MountLabel 510 }