github.com/hustcat/docker@v1.3.3-0.20160314103604-901c67a8eeab/daemon/execdriver/native/create.go (about) 1 // +build linux,cgo 2 3 package native 4 5 import ( 6 "fmt" 7 "path/filepath" 8 "strings" 9 "syscall" 10 11 "github.com/docker/docker/daemon/execdriver" 12 "github.com/docker/docker/pkg/mount" 13 "github.com/docker/docker/profiles/seccomp" 14 15 "github.com/docker/docker/volume" 16 "github.com/opencontainers/runc/libcontainer/apparmor" 17 "github.com/opencontainers/runc/libcontainer/configs" 18 "github.com/opencontainers/runc/libcontainer/devices" 19 ) 20 21 // createContainer populates and configures the container type with the 22 // data provided by the execdriver.Command 23 func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) { 24 container = execdriver.InitContainer(c) 25 26 if err := d.createIpc(container, c); err != nil { 27 return nil, err 28 } 29 30 if err := d.createPid(container, c); err != nil { 31 return nil, err 32 } 33 34 if err := d.createUTS(container, c); err != nil { 35 return nil, err 36 } 37 38 if err := d.setupRemappedRoot(container, c); err != nil { 39 return nil, err 40 } 41 42 if err := d.createNetwork(container, c, hooks); err != nil { 43 return nil, err 44 } 45 46 if c.ProcessConfig.Privileged { 47 if !container.Readonlyfs { 48 // clear readonly for /sys 49 for i := range container.Mounts { 50 if container.Mounts[i].Destination == "/sys" { 51 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 52 } 53 } 54 container.ReadonlyPaths = nil 55 } 56 57 // clear readonly for cgroup 58 for i := range container.Mounts { 59 if container.Mounts[i].Device == "cgroup" { 60 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 61 } 62 } 63 64 container.MaskPaths = nil 65 if err := d.setPrivileged(container); err != nil { 66 return nil, err 67 } 68 } else { 69 if err := d.setCapabilities(container, c); err != nil { 70 return nil, err 71 } 72 73 if c.SeccompProfile == "" { 74 container.Seccomp, err = seccomp.GetDefaultProfile() 75 if err != nil { 76 return nil, err 77 } 78 } 79 } 80 // add CAP_ prefix to all caps for new libcontainer update to match 81 // the spec format. 82 for i, s := range container.Capabilities { 83 if !strings.HasPrefix(s, "CAP_") { 84 container.Capabilities[i] = fmt.Sprintf("CAP_%s", s) 85 } 86 } 87 container.AdditionalGroups = c.GroupAdd 88 89 if c.AppArmorProfile != "" { 90 container.AppArmorProfile = c.AppArmorProfile 91 } 92 93 if c.SeccompProfile != "" && c.SeccompProfile != "unconfined" { 94 container.Seccomp, err = seccomp.LoadProfile(c.SeccompProfile) 95 if err != nil { 96 return nil, err 97 } 98 } 99 100 if err := execdriver.SetupCgroups(container, c); err != nil { 101 return nil, err 102 } 103 104 container.OomScoreAdj = c.OomScoreAdj 105 106 if container.Readonlyfs { 107 for i := range container.Mounts { 108 switch container.Mounts[i].Destination { 109 case "/proc", "/dev", "/dev/pts", "/dev/mqueue": 110 continue 111 } 112 container.Mounts[i].Flags |= syscall.MS_RDONLY 113 } 114 115 /* These paths must be remounted as r/o */ 116 container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev") 117 } 118 119 if err := d.setupMounts(container, c); err != nil { 120 return nil, err 121 } 122 123 d.setupLabels(container, c) 124 d.setupRlimits(container, c) 125 126 container.NoNewPrivileges = c.NoNewPrivileges 127 return container, nil 128 } 129 130 func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error { 131 if c.Network == nil { 132 return nil 133 } 134 if c.Network.ContainerID != "" { 135 d.Lock() 136 active := d.activeContainers[c.Network.ContainerID] 137 d.Unlock() 138 139 if active == nil { 140 return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID) 141 } 142 143 state, err := active.State() 144 if err != nil { 145 return err 146 } 147 148 container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET]) 149 return nil 150 } 151 152 if c.Network.NamespacePath != "" { 153 container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath) 154 return nil 155 } 156 // only set up prestart hook if the namespace path is not set (this should be 157 // all cases *except* for --net=host shared networking) 158 container.Hooks = &configs.Hooks{ 159 Prestart: []configs.Hook{ 160 configs.NewFunctionHook(func(s configs.HookState) error { 161 if len(hooks.PreStart) > 0 { 162 for _, fnHook := range hooks.PreStart { 163 // A closed channel for OOM is returned here as it will be 164 // non-blocking and return the correct result when read. 165 chOOM := make(chan struct{}) 166 close(chOOM) 167 if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil { 168 return err 169 } 170 } 171 } 172 return nil 173 }), 174 }, 175 } 176 return nil 177 } 178 179 func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error { 180 if c.Ipc.HostIpc { 181 container.Namespaces.Remove(configs.NEWIPC) 182 return nil 183 } 184 185 if c.Ipc.ContainerID != "" { 186 d.Lock() 187 active := d.activeContainers[c.Ipc.ContainerID] 188 d.Unlock() 189 190 if active == nil { 191 return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID) 192 } 193 194 state, err := active.State() 195 if err != nil { 196 return err 197 } 198 container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC]) 199 } 200 201 return nil 202 } 203 204 func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error { 205 if c.Pid.HostPid { 206 container.Namespaces.Remove(configs.NEWPID) 207 return nil 208 } 209 210 return nil 211 } 212 213 func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error { 214 if c.UTS.HostUTS { 215 container.Namespaces.Remove(configs.NEWUTS) 216 container.Hostname = "" 217 return nil 218 } 219 220 return nil 221 } 222 223 func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error { 224 if c.RemappedRoot.UID == 0 { 225 container.Namespaces.Remove(configs.NEWUSER) 226 return nil 227 } 228 229 // convert the Docker daemon id map to the libcontainer variant of the same struct 230 // this keeps us from having to import libcontainer code across Docker client + daemon packages 231 cuidMaps := []configs.IDMap{} 232 cgidMaps := []configs.IDMap{} 233 for _, idMap := range c.UIDMapping { 234 cuidMaps = append(cuidMaps, configs.IDMap(idMap)) 235 } 236 for _, idMap := range c.GIDMapping { 237 cgidMaps = append(cgidMaps, configs.IDMap(idMap)) 238 } 239 container.UidMappings = cuidMaps 240 container.GidMappings = cgidMaps 241 242 for _, node := range container.Devices { 243 node.Uid = uint32(c.RemappedRoot.UID) 244 node.Gid = uint32(c.RemappedRoot.GID) 245 } 246 // TODO: until a kernel/mount solution exists for handling remount in a user namespace, 247 // we must clear the readonly flag for the cgroups mount (@mrunalp concurs) 248 for i := range container.Mounts { 249 if container.Mounts[i].Device == "cgroup" { 250 container.Mounts[i].Flags &= ^syscall.MS_RDONLY 251 } 252 } 253 254 return nil 255 } 256 257 func (d *Driver) setPrivileged(container *configs.Config) (err error) { 258 container.Capabilities = execdriver.GetAllCapabilities() 259 container.Cgroups.Resources.AllowAllDevices = true 260 261 hostDevices, err := devices.HostDevices() 262 if err != nil { 263 return err 264 } 265 container.Devices = hostDevices 266 267 if apparmor.IsEnabled() { 268 container.AppArmorProfile = "unconfined" 269 } 270 return nil 271 } 272 273 func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) { 274 container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop) 275 return err 276 } 277 278 func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) { 279 if c.Resources == nil { 280 return 281 } 282 283 for _, rlimit := range c.Resources.Rlimits { 284 container.Rlimits = append(container.Rlimits, configs.Rlimit{ 285 Type: rlimit.Type, 286 Hard: rlimit.Hard, 287 Soft: rlimit.Soft, 288 }) 289 } 290 } 291 292 // If rootfs mount propagation is RPRIVATE, that means all the volumes are 293 // going to be private anyway. There is no need to apply per volume 294 // propagation on top. This is just an optimization so that cost of per volume 295 // propagation is paid only if user decides to make some volume non-private 296 // which will force rootfs mount propagation to be non RPRIVATE. 297 func checkResetVolumePropagation(container *configs.Config) { 298 if container.RootPropagation != mount.RPRIVATE { 299 return 300 } 301 for _, m := range container.Mounts { 302 m.PropagationFlags = nil 303 } 304 } 305 306 func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info { 307 for _, m := range mountinfo { 308 if m.Mountpoint == dir { 309 return m 310 } 311 } 312 return nil 313 } 314 315 // Get the source mount point of directory passed in as argument. Also return 316 // optional fields. 317 func getSourceMount(source string) (string, string, error) { 318 // Ensure any symlinks are resolved. 319 sourcePath, err := filepath.EvalSymlinks(source) 320 if err != nil { 321 return "", "", err 322 } 323 324 mountinfos, err := mount.GetMounts() 325 if err != nil { 326 return "", "", err 327 } 328 329 mountinfo := getMountInfo(mountinfos, sourcePath) 330 if mountinfo != nil { 331 return sourcePath, mountinfo.Optional, nil 332 } 333 334 path := sourcePath 335 for { 336 path = filepath.Dir(path) 337 338 mountinfo = getMountInfo(mountinfos, path) 339 if mountinfo != nil { 340 return path, mountinfo.Optional, nil 341 } 342 343 if path == "/" { 344 break 345 } 346 } 347 348 // If we are here, we did not find parent mount. Something is wrong. 349 return "", "", fmt.Errorf("Could not find source mount of %s", source) 350 } 351 352 // Ensure mount point on which path is mounted, is shared. 353 func ensureShared(path string) error { 354 sharedMount := false 355 356 sourceMount, optionalOpts, err := getSourceMount(path) 357 if err != nil { 358 return err 359 } 360 // Make sure source mount point is shared. 361 optsSplit := strings.Split(optionalOpts, " ") 362 for _, opt := range optsSplit { 363 if strings.HasPrefix(opt, "shared:") { 364 sharedMount = true 365 break 366 } 367 } 368 369 if !sharedMount { 370 return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount) 371 } 372 return nil 373 } 374 375 // Ensure mount point on which path is mounted, is either shared or slave. 376 func ensureSharedOrSlave(path string) error { 377 sharedMount := false 378 slaveMount := false 379 380 sourceMount, optionalOpts, err := getSourceMount(path) 381 if err != nil { 382 return err 383 } 384 // Make sure source mount point is shared. 385 optsSplit := strings.Split(optionalOpts, " ") 386 for _, opt := range optsSplit { 387 if strings.HasPrefix(opt, "shared:") { 388 sharedMount = true 389 break 390 } else if strings.HasPrefix(opt, "master:") { 391 slaveMount = true 392 break 393 } 394 } 395 396 if !sharedMount && !slaveMount { 397 return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount) 398 } 399 return nil 400 } 401 402 func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error { 403 userMounts := make(map[string]struct{}) 404 for _, m := range c.Mounts { 405 userMounts[m.Destination] = struct{}{} 406 } 407 408 // Filter out mounts that are overridden by user supplied mounts 409 var defaultMounts []*configs.Mount 410 _, mountDev := userMounts["/dev"] 411 for _, m := range container.Mounts { 412 if _, ok := userMounts[m.Destination]; !ok { 413 if mountDev && strings.HasPrefix(m.Destination, "/dev/") { 414 container.Devices = nil 415 continue 416 } 417 defaultMounts = append(defaultMounts, m) 418 } 419 } 420 container.Mounts = defaultMounts 421 422 mountPropagationMap := map[string]int{ 423 "private": mount.PRIVATE, 424 "rprivate": mount.RPRIVATE, 425 "shared": mount.SHARED, 426 "rshared": mount.RSHARED, 427 "slave": mount.SLAVE, 428 "rslave": mount.RSLAVE, 429 } 430 431 for _, m := range c.Mounts { 432 for _, cm := range container.Mounts { 433 if cm.Destination == m.Destination { 434 return fmt.Errorf("Duplicate mount point '%s'", m.Destination) 435 } 436 } 437 438 if m.Source == "tmpfs" { 439 var ( 440 data = "size=65536k" 441 flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV 442 err error 443 ) 444 if m.Data != "" { 445 flags, data, err = mount.ParseTmpfsOptions(m.Data) 446 if err != nil { 447 return err 448 } 449 } 450 container.Mounts = append(container.Mounts, &configs.Mount{ 451 Source: m.Source, 452 Destination: m.Destination, 453 Data: data, 454 Device: "tmpfs", 455 Flags: flags, 456 PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]}, 457 }) 458 continue 459 } 460 flags := syscall.MS_BIND | syscall.MS_REC 461 var pFlag int 462 if !m.Writable { 463 flags |= syscall.MS_RDONLY 464 } 465 466 // Determine property of RootPropagation based on volume 467 // properties. If a volume is shared, then keep root propagation 468 // shared. This should work for slave and private volumes too. 469 // 470 // For slave volumes, it can be either [r]shared/[r]slave. 471 // 472 // For private volumes any root propagation value should work. 473 474 pFlag = mountPropagationMap[m.Propagation] 475 if pFlag == mount.SHARED || pFlag == mount.RSHARED { 476 if err := ensureShared(m.Source); err != nil { 477 return err 478 } 479 rootpg := container.RootPropagation 480 if rootpg != mount.SHARED && rootpg != mount.RSHARED { 481 execdriver.SetRootPropagation(container, mount.SHARED) 482 } 483 } else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE { 484 if err := ensureSharedOrSlave(m.Source); err != nil { 485 return err 486 } 487 rootpg := container.RootPropagation 488 if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE { 489 execdriver.SetRootPropagation(container, mount.RSLAVE) 490 } 491 } 492 493 mount := &configs.Mount{ 494 Source: m.Source, 495 Destination: m.Destination, 496 Device: "bind", 497 Flags: flags, 498 } 499 500 if pFlag != 0 { 501 mount.PropagationFlags = []int{pFlag} 502 } 503 504 container.Mounts = append(container.Mounts, mount) 505 } 506 507 checkResetVolumePropagation(container) 508 return nil 509 } 510 511 func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) { 512 container.ProcessLabel = c.ProcessLabel 513 container.MountLabel = c.MountLabel 514 }