github.com/apptainer/singularity@v3.1.1+incompatible/internal/pkg/runtime/engines/singularity/container.go (about) 1 // Copyright (c) 2018-2019, Sylabs Inc. All rights reserved. 2 // This software is licensed under a 3-clause BSD license. Please consult the 3 // LICENSE.md file distributed with the sources of this project regarding your 4 // rights to use or distribute this software. 5 6 package singularity 7 8 import ( 9 "fmt" 10 "io/ioutil" 11 "os" 12 "path/filepath" 13 "strconv" 14 "strings" 15 "syscall" 16 17 "github.com/sylabs/singularity/internal/pkg/util/mainthread" 18 19 specs "github.com/opencontainers/runtime-spec/specs-go" 20 "github.com/sylabs/singularity/internal/pkg/buildcfg" 21 "github.com/sylabs/singularity/internal/pkg/cgroups" 22 "github.com/sylabs/singularity/internal/pkg/runtime/engines/singularity/rpc/client" 23 "github.com/sylabs/singularity/internal/pkg/sylog" 24 "github.com/sylabs/singularity/internal/pkg/util/fs" 25 "github.com/sylabs/singularity/internal/pkg/util/fs/files" 26 "github.com/sylabs/singularity/internal/pkg/util/fs/layout" 27 "github.com/sylabs/singularity/internal/pkg/util/fs/layout/layer/overlay" 28 "github.com/sylabs/singularity/internal/pkg/util/fs/layout/layer/underlay" 29 "github.com/sylabs/singularity/internal/pkg/util/fs/mount" 30 "github.com/sylabs/singularity/internal/pkg/util/user" 31 "github.com/sylabs/singularity/pkg/image" 32 "github.com/sylabs/singularity/pkg/network" 33 "github.com/sylabs/singularity/pkg/util/fs/proc" 34 "github.com/sylabs/singularity/pkg/util/loop" 35 "golang.org/x/crypto/ssh/terminal" 36 ) 37 38 // defaultCNIConfPath is the default directory to CNI network configuration files 39 var defaultCNIConfPath = filepath.Join(buildcfg.SYSCONFDIR, "singularity", "network") 40 41 // defaultCNIPluginPath is the default directory to CNI plugins executables 42 var defaultCNIPluginPath = filepath.Join(buildcfg.LIBEXECDIR, "singularity", "cni") 43 44 type container struct { 45 engine *EngineOperations 46 rpcOps *client.RPC 47 session *layout.Session 48 sessionLayerType string 49 sessionFsType string 50 sessionSize int 51 userNS bool 52 pidNS bool 53 utsNS bool 54 netNS bool 55 ipcNS bool 56 mountInfoPath string 57 skippedMount []string 58 checkDest []string 59 suidFlag uintptr 60 devSourcePath string 61 } 62 63 func create(engine *EngineOperations, rpcOps *client.RPC, pid int) error { 64 var err error 65 66 c := &container{ 67 engine: engine, 68 rpcOps: rpcOps, 69 sessionLayerType: "none", 70 sessionFsType: engine.EngineConfig.File.MemoryFSType, 71 mountInfoPath: fmt.Sprintf("/proc/%d/mountinfo", pid), 72 skippedMount: make([]string, 0), 73 checkDest: make([]string, 0), 74 suidFlag: syscall.MS_NOSUID, 75 } 76 77 cwd := engine.EngineConfig.GetCwd() 78 if err := os.Chdir(cwd); err != nil { 79 return fmt.Errorf("can't change directory to %s: %s", cwd, err) 80 } 81 82 if engine.EngineConfig.OciConfig.Linux != nil { 83 for _, namespace := range engine.EngineConfig.OciConfig.Linux.Namespaces { 84 switch namespace.Type { 85 case specs.UserNamespace: 86 c.userNS = true 87 case specs.PIDNamespace: 88 c.pidNS = true 89 case specs.UTSNamespace: 90 c.utsNS = true 91 case specs.NetworkNamespace: 92 c.netNS = true 93 case specs.IPCNamespace: 94 c.ipcNS = true 95 } 96 } 97 } 98 99 if os.Geteuid() != 0 { 100 c.sessionSize = int(engine.EngineConfig.File.SessiondirMaxSize) 101 } else if engine.EngineConfig.GetAllowSUID() && !c.userNS { 102 c.suidFlag = 0 103 } 104 105 p := &mount.Points{} 106 system := &mount.System{Points: p, Mount: c.mount} 107 108 if err := c.setupSessionLayout(system); err != nil { 109 return err 110 } 111 112 if err := system.RunAfterTag(mount.LayerTag, c.addIdentityMount); err != nil { 113 return err 114 } 115 if err := system.RunAfterTag(mount.RootfsTag, c.addActionsMount); err != nil { 116 return err 117 } 118 119 if err := c.addRootfsMount(system); err != nil { 120 return err 121 } 122 if err := c.addKernelMount(system); err != nil { 123 return err 124 } 125 if err := c.addDevMount(system); err != nil { 126 return err 127 } 128 if err := c.addHostMount(system); err != nil { 129 return err 130 } 131 if err := c.addBindsMount(system); err != nil { 132 return err 133 } 134 if err := c.addHomeMount(system); err != nil { 135 return err 136 } 137 if err := c.addUserbindsMount(system); err != nil { 138 return err 139 } 140 if err := c.addTmpMount(system); err != nil { 141 return err 142 } 143 if err := c.addScratchMount(system); err != nil { 144 return err 145 } 146 if err := c.addCwdMount(system); err != nil { 147 return err 148 } 149 if err := c.addLibsMount(system); err != nil { 150 return err 151 } 152 if err := c.addResolvConfMount(system); err != nil { 153 return err 154 } 155 if err := c.addHostnameMount(system); err != nil { 156 return err 157 } 158 159 sylog.Debugf("Mount all") 160 if err := system.MountAll(); err != nil { 161 return err 162 } 163 164 sylog.Debugf("Chroot into %s\n", c.session.FinalPath()) 165 _, err = c.rpcOps.Chroot(c.session.FinalPath(), "pivot") 166 if err != nil { 167 sylog.Debugf("Fallback to move/chroot") 168 _, err = c.rpcOps.Chroot(c.session.FinalPath(), "move") 169 if err != nil { 170 return fmt.Errorf("chroot failed: %s", err) 171 } 172 } 173 174 if c.netNS { 175 if os.Geteuid() == 0 && !c.userNS { 176 /* hold a reference to container network namespace for cleanup */ 177 f, err := syscall.Open("/proc/"+strconv.Itoa(pid)+"/ns/net", os.O_RDONLY, 0) 178 if err != nil { 179 return fmt.Errorf("can't open network namespace: %s", err) 180 } 181 nspath := fmt.Sprintf("/proc/%d/fd/%d", os.Getpid(), f) 182 networks := strings.Split(engine.EngineConfig.GetNetwork(), ",") 183 184 cniPath := &network.CNIPath{} 185 186 if engine.EngineConfig.File.CniConfPath != "" { 187 cniPath.Conf = engine.EngineConfig.File.CniConfPath 188 } else { 189 cniPath.Conf = defaultCNIConfPath 190 } 191 if engine.EngineConfig.File.CniPluginPath != "" { 192 cniPath.Plugin = engine.EngineConfig.File.CniPluginPath 193 } else { 194 cniPath.Plugin = defaultCNIPluginPath 195 } 196 197 setup, err := network.NewSetup(networks, strconv.Itoa(pid), nspath, cniPath) 198 if err != nil { 199 return fmt.Errorf("%s", err) 200 } 201 netargs := engine.EngineConfig.GetNetworkArgs() 202 if err := setup.SetArgs(netargs); err != nil { 203 return fmt.Errorf("%s", err) 204 } 205 206 setup.SetEnvPath("/bin:/sbin:/usr/bin:/usr/sbin") 207 208 if err := setup.AddNetworks(); err != nil { 209 return fmt.Errorf("%s", err) 210 } 211 212 engine.EngineConfig.Network = setup 213 } else if engine.EngineConfig.GetNetwork() != "none" { 214 return fmt.Errorf("Network requires root permissions or --network=none argument as user") 215 } 216 } 217 218 if os.Geteuid() == 0 { 219 path := engine.EngineConfig.GetCgroupsPath() 220 if path != "" { 221 cgroupPath := filepath.Join("/singularity", strconv.Itoa(pid)) 222 manager := &cgroups.Manager{Pid: pid, Path: cgroupPath} 223 if err := manager.ApplyFromFile(path); err != nil { 224 return fmt.Errorf("Failed to apply cgroups ressources restriction: %s", err) 225 } 226 engine.EngineConfig.Cgroups = manager 227 } 228 } 229 230 sylog.Debugf("Chdir into / to avoid errors\n") 231 err = syscall.Chdir("/") 232 if err != nil { 233 return fmt.Errorf("change directory failed: %s", err) 234 } 235 236 return nil 237 } 238 239 func (c *container) setupSIFOverlay(img *image.Image, writable bool) error { 240 // Determine if overlay partitions exists 241 overlayPart := 0 242 overlayImg := c.engine.EngineConfig.GetOverlayImage() 243 imglist := c.engine.EngineConfig.GetImageList() 244 245 for _, p := range img.Partitions[1:] { 246 if p.Type == image.EXT3 || p.Type == image.SQUASHFS { 247 imgCopy := *img 248 imgCopy.Type = int(p.Type) 249 imgCopy.Partitions = []image.Section{p} 250 imglist = append(imglist, imgCopy) 251 overlayImg = append(overlayImg, imgCopy.Path) 252 overlayPart++ 253 } 254 } 255 256 c.engine.EngineConfig.SetOverlayImage(overlayImg) 257 c.engine.EngineConfig.SetImageList(imglist) 258 259 if overlayPart == 0 && writable { 260 return fmt.Errorf("no SIF writable overlay partition found") 261 } 262 263 return nil 264 } 265 266 // setupSessionLayout will create the session layout according to the capabilities of Singularity 267 // on the system. It will first attempt to use "overlay", followed by "underlay", and if neither 268 // are available it will not use either. If neither are used, we will not be able to bind mount 269 // to non-existent paths within the container 270 func (c *container) setupSessionLayout(system *mount.System) error { 271 writableTmpfs := c.engine.EngineConfig.GetWritableTmpfs() 272 overlayEnabled := false 273 274 sessionPath, err := filepath.EvalSymlinks(buildcfg.SESSIONDIR) 275 if err != nil { 276 return fmt.Errorf("failed to resolved session directory %s: %s", buildcfg.SESSIONDIR, err) 277 } 278 279 if enabled, _ := proc.HasFilesystem("overlay"); enabled && !c.userNS { 280 switch c.engine.EngineConfig.File.EnableOverlay { 281 case "yes", "try": 282 overlayEnabled = true 283 } 284 } 285 286 imgObject, err := c.loadImage(c.engine.EngineConfig.GetImage(), true) 287 if err != nil { 288 return fmt.Errorf("while loading image object: %s", err) 289 } 290 291 if c.engine.EngineConfig.GetWritableImage() && !writableTmpfs { 292 sylog.Debugf("Image is writable, not attempting to use overlay or underlay\n") 293 if imgObject.Type == image.SIF { 294 err = c.setupSIFOverlay(imgObject, c.engine.EngineConfig.GetWritableImage()) 295 if err == nil { 296 return c.setupOverlayLayout(system, sessionPath) 297 } 298 sylog.Warningf("While attempting to set up SIFOverlay: %s", err) 299 } 300 return c.setupDefaultLayout(system, sessionPath) 301 } 302 303 if overlayEnabled { 304 sylog.Debugf("Attempting to use overlayfs (enable overlay = %v)\n", c.engine.EngineConfig.File.EnableOverlay) 305 if imgObject.Type == image.SIF { 306 err = c.setupSIFOverlay(imgObject, c.engine.EngineConfig.GetWritableImage()) 307 if err == nil { 308 return c.setupOverlayLayout(system, sessionPath) 309 } 310 sylog.Warningf("While attempting to set up SIFOverlay: %s", err) 311 } 312 return c.setupOverlayLayout(system, sessionPath) 313 } 314 315 if writableTmpfs { 316 sylog.Warningf("Ignoring --writable-tmpfs as it requires overlay support") 317 } 318 319 if c.engine.EngineConfig.File.EnableUnderlay { 320 sylog.Debugf("Attempting to use underlay (enable underlay = yes)\n") 321 return c.setupUnderlayLayout(system, sessionPath) 322 } 323 324 sylog.Debugf("Not attempting to use underlay or overlay\n") 325 return c.setupDefaultLayout(system, sessionPath) 326 } 327 328 // setupOverlayLayout sets up the session with overlay filesystem 329 func (c *container) setupOverlayLayout(system *mount.System, sessionPath string) (err error) { 330 sylog.Debugf("Creating overlay SESSIONDIR layout\n") 331 if c.session, err = layout.NewSession(sessionPath, c.sessionFsType, c.sessionSize, system, overlay.New()); err != nil { 332 return err 333 } 334 335 if err := c.addOverlayMount(system); err != nil { 336 return err 337 } 338 339 c.sessionLayerType = "overlay" 340 return system.RunAfterTag(mount.LayerTag, c.setPropagationMount) 341 } 342 343 // setupUnderlayLayout sets up the session with underlay "filesystem" 344 func (c *container) setupUnderlayLayout(system *mount.System, sessionPath string) (err error) { 345 sylog.Debugf("Creating underlay SESSIONDIR layout\n") 346 if c.session, err = layout.NewSession(sessionPath, c.sessionFsType, c.sessionSize, system, underlay.New()); err != nil { 347 return err 348 } 349 350 c.sessionLayerType = "underlay" 351 return system.RunAfterTag(mount.LayerTag, c.setPropagationMount) 352 } 353 354 // setupDefaultLayout sets up the session without overlay or underlay 355 func (c *container) setupDefaultLayout(system *mount.System, sessionPath string) (err error) { 356 sylog.Debugf("Creating default SESSIONDIR layout\n") 357 if c.session, err = layout.NewSession(sessionPath, c.sessionFsType, c.sessionSize, system, nil); err != nil { 358 return err 359 } 360 361 c.sessionLayerType = "none" 362 return system.RunAfterTag(mount.RootfsTag, c.setPropagationMount) 363 } 364 365 // isLayerEnabled returns whether or not overlay or underlay system 366 // is enabled 367 func (c *container) isLayerEnabled() bool { 368 sylog.Debugf("Using Layer system: %v\n", c.sessionLayerType) 369 if c.sessionLayerType == "none" { 370 return false 371 } 372 373 return true 374 } 375 376 func (c *container) mount(point *mount.Point) error { 377 if _, err := mount.GetOffset(point.InternalOptions); err == nil { 378 if err := c.mountImage(point); err != nil { 379 return fmt.Errorf("can't mount image %s: %s", point.Source, err) 380 } 381 } else { 382 if err := c.mountGeneric(point); err != nil { 383 flags, _ := mount.ConvertOptions(point.Options) 384 if flags&syscall.MS_REMOUNT != 0 { 385 return fmt.Errorf("can't remount %s: %s", point.Destination, err) 386 } 387 if point.Type != "" { 388 if point.Source == "devpts" { 389 sylog.Verbosef("Couldn't mount devpts filesystem, continuing with PTY allocation functionality disabled") 390 } else { 391 // mount error for other filesystems is considered fatal 392 return fmt.Errorf("can't mount %s filesystem to %s: %s", point.Type, point.Destination, err) 393 } 394 } 395 sylog.Verbosef("can't mount %s: %s", point.Source, err) 396 return nil 397 } 398 } 399 return nil 400 } 401 402 func (c *container) setPropagationMount(system *mount.System) error { 403 pflags := uintptr(syscall.MS_REC) 404 405 if c.engine.EngineConfig.File.MountSlave { 406 sylog.Debugf("Set RPC mount propagation flag to SLAVE") 407 pflags |= syscall.MS_SLAVE 408 } else { 409 sylog.Debugf("Set RPC mount propagation flag to PRIVATE") 410 pflags |= syscall.MS_PRIVATE 411 } 412 413 if _, err := c.rpcOps.Mount("", "/", "", pflags, ""); err != nil { 414 return err 415 } 416 return nil 417 } 418 419 func (c *container) checkMounted(dest string) string { 420 if dest[0] != '/' { 421 return "" 422 } 423 424 minfo, err := proc.ParseMountInfo(c.mountInfoPath) 425 if err != nil { 426 return "" 427 } 428 429 p, err := filepath.EvalSymlinks(dest) 430 if err != nil { 431 return "" 432 } 433 434 finalPath := c.session.FinalPath() 435 rootfsPath := c.session.RootFsPath() 436 sessionPath := c.session.Path() 437 438 for { 439 if p == finalPath || p == rootfsPath || p == sessionPath || p == "/" { 440 break 441 } 442 for _, childs := range minfo { 443 for _, child := range childs { 444 if p == child { 445 return child 446 } 447 } 448 } 449 p = filepath.Dir(p) 450 } 451 return "" 452 } 453 454 // mount any generic mount (not loop dev) 455 func (c *container) mountGeneric(mnt *mount.Point) (err error) { 456 flags, opts := mount.ConvertOptions(mnt.Options) 457 optsString := strings.Join(opts, ",") 458 sessionPath := c.session.Path() 459 remount := mount.HasRemountFlag(flags) 460 propagation := mount.HasPropagationFlag(flags) 461 source := mnt.Source 462 dest := "" 463 464 if flags&syscall.MS_BIND != 0 && !remount { 465 if _, err := os.Stat(source); os.IsNotExist(err) { 466 c.skippedMount = append(c.skippedMount, mnt.Destination) 467 sylog.Debugf("Skipping mount, host source %s doesn't exist", source) 468 return nil 469 } 470 } 471 472 if !strings.HasPrefix(mnt.Destination, sessionPath) { 473 dest = fs.EvalRelative(mnt.Destination, c.session.FinalPath()) 474 475 dest = filepath.Join(c.session.FinalPath(), dest) 476 477 if _, err := os.Stat(dest); os.IsNotExist(err) { 478 c.skippedMount = append(c.skippedMount, mnt.Destination) 479 sylog.Debugf("Skipping mount, %s doesn't exist in container", dest) 480 return nil 481 } 482 } else { 483 dest = mnt.Destination 484 if _, err := os.Stat(dest); os.IsNotExist(err) { 485 return fmt.Errorf("destination %s doesn't exist", dest) 486 } 487 } 488 489 if remount || propagation { 490 for _, skipped := range c.skippedMount { 491 if skipped == mnt.Destination { 492 return nil 493 } 494 } 495 sylog.Debugf("Remounting %s\n", dest) 496 } else { 497 for _, d := range c.checkDest { 498 if d == mnt.Destination { 499 mounted := c.checkMounted(dest) 500 if mounted != "" { 501 c.skippedMount = append(c.skippedMount, mnt.Destination) 502 sylog.Debugf("Skipping mount %s, %s already mounted", dest, mounted) 503 return nil 504 } 505 break 506 } 507 } 508 sylog.Debugf("Mounting %s to %s\n", source, dest) 509 510 // in stage 1 we changed current working directory to 511 // sandbox image directory, just pass "." as source argument to 512 // be sure RPC mount the right sandbox image 513 if dest == c.session.RootFsPath() && flags&syscall.MS_BIND != 0 { 514 source = "." 515 } 516 517 // overlay requires root filesystem UID/GID since upper/work 518 // directories are owned by root 519 if mnt.Type == "overlay" { 520 c.rpcOps.SetFsID(0, 0) 521 defer c.rpcOps.SetFsID(os.Getuid(), os.Getgid()) 522 } 523 } 524 _, err = c.rpcOps.Mount(source, dest, mnt.Type, flags, optsString) 525 return err 526 } 527 528 // mount image via loop 529 func (c *container) mountImage(mnt *mount.Point) error { 530 maxDevices := int(c.engine.EngineConfig.File.MaxLoopDevices) 531 flags, opts := mount.ConvertOptions(mnt.Options) 532 optsString := strings.Join(opts, ",") 533 534 offset, err := mount.GetOffset(mnt.InternalOptions) 535 if err != nil { 536 return err 537 } 538 539 sizelimit, err := mount.GetSizeLimit(mnt.InternalOptions) 540 if err != nil { 541 return err 542 } 543 544 attachFlag := os.O_RDWR 545 loopFlags := uint32(loop.FlagsAutoClear) 546 547 if flags&syscall.MS_RDONLY == 1 { 548 loopFlags |= loop.FlagsReadOnly 549 attachFlag = os.O_RDONLY 550 } 551 552 info := &loop.Info64{ 553 Offset: offset, 554 SizeLimit: sizelimit, 555 Flags: loopFlags, 556 } 557 558 shared := c.engine.EngineConfig.File.SharedLoopDevices 559 number, err := c.rpcOps.LoopDevice(mnt.Source, attachFlag, *info, maxDevices, shared) 560 if err != nil { 561 return fmt.Errorf("failed to find loop device: %s", err) 562 } 563 564 path := fmt.Sprintf("/dev/loop%d", number) 565 sylog.Debugf("Mounting loop device %s to %s\n", path, mnt.Destination) 566 _, err = c.rpcOps.Mount(path, mnt.Destination, mnt.Type, flags, optsString) 567 if err != nil { 568 return fmt.Errorf("failed to mount %s filesystem: %s", mnt.Type, err) 569 } 570 571 return nil 572 } 573 574 func (c *container) loadImage(path string, rootfs bool) (*image.Image, error) { 575 list := c.engine.EngineConfig.GetImageList() 576 577 if len(list) == 0 { 578 return nil, fmt.Errorf("no root filesystem found in %s", path) 579 } 580 581 if rootfs { 582 img := list[0] 583 if img.File == nil { 584 return &img, nil 585 } 586 img.File = os.NewFile(img.Fd, img.Path) 587 if img.File == nil { 588 return nil, fmt.Errorf("can't find image %s", path) 589 } 590 return &img, nil 591 } 592 for _, img := range list[1:] { 593 p, err := image.ResolvePath(path) 594 if err != nil { 595 return nil, err 596 } 597 if p == img.Path { 598 if img.File == nil { 599 return &img, nil 600 } 601 img.File = os.NewFile(img.Fd, img.Path) 602 if img.File == nil { 603 return nil, fmt.Errorf("can't find image %s", path) 604 } 605 return &img, nil 606 } 607 } 608 609 return nil, fmt.Errorf("no image found with path %s", path) 610 } 611 612 func (c *container) addRootfsMount(system *mount.System) error { 613 flags := uintptr(c.suidFlag | syscall.MS_NODEV) 614 rootfs := c.engine.EngineConfig.GetImage() 615 616 imageObject, err := c.loadImage(rootfs, true) 617 if err != nil { 618 return err 619 } 620 621 if !imageObject.Writable { 622 sylog.Debugf("Mount rootfs in read-only mode") 623 flags |= syscall.MS_RDONLY 624 } else { 625 sylog.Debugf("Mount rootfs in read-write mode") 626 } 627 628 mountType := "" 629 offset := imageObject.Partitions[0].Offset 630 size := imageObject.Partitions[0].Size 631 632 switch imageObject.Partitions[0].Type { 633 case image.SQUASHFS: 634 mountType = "squashfs" 635 case image.EXT3: 636 mountType = "ext3" 637 case image.SANDBOX: 638 sylog.Debugf("Mounting directory rootfs: %v\n", rootfs) 639 flags |= syscall.MS_BIND 640 if err := system.Points.AddBind(mount.RootfsTag, rootfs, c.session.RootFsPath(), flags); err != nil { 641 return err 642 } 643 if !c.userNS { 644 system.Points.AddRemount(mount.RootfsTag, c.session.RootFsPath(), flags) 645 } 646 return nil 647 } 648 649 sylog.Debugf("Mounting block [%v] image: %v\n", mountType, rootfs) 650 if err := system.Points.AddImage( 651 mount.RootfsTag, 652 imageObject.Source, 653 c.session.RootFsPath(), 654 mountType, 655 flags, 656 offset, 657 size, 658 ); err != nil { 659 return err 660 } 661 662 if imageObject.Writable { 663 return system.Points.AddPropagation(mount.DevTag, c.session.RootFsPath(), syscall.MS_UNBINDABLE) 664 } 665 666 return nil 667 } 668 669 func (c *container) overlayUpperWork(system *mount.System) error { 670 ov := c.session.Layer.(*overlay.Overlay) 671 672 u := ov.GetUpperDir() 673 w := ov.GetWorkDir() 674 675 if fs.IsLink(u) { 676 return fmt.Errorf("symlink detected, upper overlay %s must be a directory", u) 677 } 678 if fs.IsLink(w) { 679 return fmt.Errorf("symlink detected, work overlay %s must be a directory", w) 680 } 681 682 c.rpcOps.SetFsID(0, 0) 683 defer c.rpcOps.SetFsID(os.Getuid(), os.Getgid()) 684 685 if !fs.IsDir(u) { 686 if _, err := c.rpcOps.Mkdir(u, 0755); err != nil { 687 return fmt.Errorf("failed to create %s directory: %s", u, err) 688 } 689 } 690 if !fs.IsDir(w) { 691 if _, err := c.rpcOps.Mkdir(w, 0755); err != nil { 692 return fmt.Errorf("failed to create %s directory: %s", w, err) 693 } 694 } 695 696 return nil 697 } 698 699 func (c *container) addOverlayMount(system *mount.System) error { 700 nb := 0 701 ov := c.session.Layer.(*overlay.Overlay) 702 hasUpper := false 703 704 if c.engine.EngineConfig.GetWritableTmpfs() { 705 sylog.Debugf("Setup writable tmpfs overlay") 706 707 if err := c.session.AddDir("/tmpfs/upper"); err != nil { 708 return err 709 } 710 if err := c.session.AddDir("/tmpfs/work"); err != nil { 711 return err 712 } 713 714 upper, _ := c.session.GetPath("/tmpfs/upper") 715 work, _ := c.session.GetPath("/tmpfs/work") 716 717 if err := ov.SetUpperDir(upper); err != nil { 718 return fmt.Errorf("failed to add overlay upper: %s", err) 719 } 720 if err := ov.SetWorkDir(work); err != nil { 721 return fmt.Errorf("failed to add overlay upper: %s", err) 722 } 723 724 tmpfsPath := filepath.Dir(upper) 725 726 flags := uintptr(c.suidFlag | syscall.MS_NODEV) 727 728 if err := system.Points.AddBind(mount.PreLayerTag, tmpfsPath, tmpfsPath, flags); err != nil { 729 return fmt.Errorf("failed to add %s temporary filesystem: %s", tmpfsPath, err) 730 } 731 732 if err := system.Points.AddRemount(mount.PreLayerTag, tmpfsPath, flags); err != nil { 733 return fmt.Errorf("failed to add %s temporary filesystem: %s", tmpfsPath, err) 734 } 735 736 hasUpper = true 737 } 738 739 for _, img := range c.engine.EngineConfig.GetOverlayImage() { 740 splitted := strings.SplitN(img, ":", 2) 741 742 imageObject, err := c.loadImage(splitted[0], false) 743 if err != nil { 744 return fmt.Errorf("failed to open overlay image %s: %s", splitted[0], err) 745 } 746 747 sessionDest := fmt.Sprintf("/overlay-images/%d", nb) 748 if err := c.session.AddDir(sessionDest); err != nil { 749 return fmt.Errorf("failed to create session directory for overlay: %s", err) 750 } 751 dst, _ := c.session.GetPath(sessionDest) 752 nb++ 753 754 src := imageObject.Source 755 offset := imageObject.Partitions[0].Offset 756 size := imageObject.Partitions[0].Size 757 758 switch imageObject.Type { 759 case image.EXT3: 760 flags := uintptr(c.suidFlag | syscall.MS_NODEV) 761 762 if !imageObject.Writable { 763 flags |= syscall.MS_RDONLY 764 ov.AddLowerDir(filepath.Join(dst, "upper")) 765 } 766 767 err = system.Points.AddImage(mount.PreLayerTag, src, dst, "ext3", flags, offset, size) 768 if err != nil { 769 return fmt.Errorf("while adding ext3 image: %s", err) 770 } 771 flags &^= syscall.MS_RDONLY 772 case image.SQUASHFS: 773 flags := uintptr(c.suidFlag | syscall.MS_NODEV | syscall.MS_RDONLY) 774 err = system.Points.AddImage(mount.PreLayerTag, src, dst, "squashfs", flags, offset, size) 775 if err != nil { 776 return err 777 } 778 ov.AddLowerDir(dst) 779 case image.SANDBOX: 780 if os.Geteuid() != 0 { 781 return fmt.Errorf("only root user can use sandbox as overlay") 782 } 783 784 flags := uintptr(c.suidFlag | syscall.MS_NODEV) 785 err = system.Points.AddBind(mount.PreLayerTag, imageObject.Path, dst, flags) 786 if err != nil { 787 return fmt.Errorf("while adding sandbox image: %s", err) 788 } 789 system.Points.AddRemount(mount.PreLayerTag, dst, flags) 790 791 if !imageObject.Writable { 792 if fs.IsDir(filepath.Join(imageObject.Path, "upper")) { 793 ov.AddLowerDir(filepath.Join(dst, "upper")) 794 } else { 795 ov.AddLowerDir(dst) 796 } 797 } 798 default: 799 return fmt.Errorf("unknown image format") 800 } 801 802 err = system.Points.AddPropagation(mount.DevTag, dst, syscall.MS_UNBINDABLE) 803 if err != nil { 804 return err 805 } 806 807 if imageObject.Writable && !hasUpper { 808 upper := filepath.Join(dst, "upper") 809 work := filepath.Join(dst, "work") 810 811 if err := ov.SetUpperDir(upper); err != nil { 812 return fmt.Errorf("failed to add overlay upper: %s", err) 813 } 814 if err := ov.SetWorkDir(work); err != nil { 815 return fmt.Errorf("failed to add overlay upper: %s", err) 816 } 817 818 hasUpper = true 819 } 820 } 821 822 if hasUpper { 823 if err := system.RunAfterTag(mount.PreLayerTag, c.overlayUpperWork); err != nil { 824 return err 825 } 826 } 827 828 return system.Points.AddPropagation(mount.DevTag, c.session.FinalPath(), syscall.MS_UNBINDABLE) 829 } 830 831 func (c *container) addKernelMount(system *mount.System) error { 832 var err error 833 bindFlags := uintptr(syscall.MS_BIND | syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_REC) 834 835 sylog.Debugf("Checking configuration file for 'mount proc'") 836 if c.engine.EngineConfig.File.MountProc { 837 sylog.Debugf("Adding proc to mount list\n") 838 if c.pidNS { 839 err = system.Points.AddFS(mount.KernelTag, "/proc", "proc", syscall.MS_NOSUID|syscall.MS_NODEV, "") 840 } else { 841 err = system.Points.AddBind(mount.KernelTag, "/proc", "/proc", bindFlags) 842 if err == nil { 843 if !c.userNS { 844 system.Points.AddRemount(mount.KernelTag, "/proc", bindFlags) 845 } 846 } 847 } 848 if err != nil { 849 return fmt.Errorf("unable to add proc to mount list: %s", err) 850 } 851 sylog.Verbosef("Default mount: /proc:/proc") 852 } else { 853 sylog.Verbosef("Skipping /proc mount") 854 } 855 856 sylog.Debugf("Checking configuration file for 'mount sys'") 857 if c.engine.EngineConfig.File.MountSys { 858 sylog.Debugf("Adding sysfs to mount list\n") 859 if !c.userNS { 860 err = system.Points.AddFS(mount.KernelTag, "/sys", "sysfs", syscall.MS_NOSUID|syscall.MS_NODEV, "") 861 } else { 862 err = system.Points.AddBind(mount.KernelTag, "/sys", "/sys", bindFlags) 863 if err == nil { 864 if !c.userNS { 865 system.Points.AddRemount(mount.KernelTag, "/sys", bindFlags) 866 } 867 } 868 } 869 if err != nil { 870 return fmt.Errorf("unable to add sys to mount list: %s", err) 871 } 872 sylog.Verbosef("Default mount: /sys:/sys") 873 } else { 874 sylog.Verbosef("Skipping /sys mount") 875 } 876 return nil 877 } 878 879 func (c *container) addSessionDevAt(srcpath string, atpath string, system *mount.System) error { 880 fi, err := os.Lstat(srcpath) 881 if err != nil { 882 return err 883 } 884 885 switch mode := fi.Mode(); { 886 case mode&os.ModeSymlink != 0: 887 target, err := os.Readlink(srcpath) 888 if err != nil { 889 return err 890 } 891 if err := c.session.AddSymlink(atpath, target); err != nil { 892 return fmt.Errorf("failed to create symlink %s", atpath) 893 } 894 895 dst, _ := c.session.GetPath(atpath) 896 897 sylog.Debugf("Adding symlink device %s to %s at %s", srcpath, target, dst) 898 899 return nil 900 case mode.IsDir(): 901 if err := c.session.AddDir(atpath); err != nil { 902 return fmt.Errorf("failed to add %s session dir: %s", atpath, err) 903 } 904 default: 905 if err := c.session.AddFile(atpath, nil); err != nil { 906 return fmt.Errorf("failed to add %s session file: %s", atpath, err) 907 } 908 } 909 910 dst, _ := c.session.GetPath(atpath) 911 912 sylog.Debugf("Mounting device %s at %s", srcpath, dst) 913 914 if err := system.Points.AddBind(mount.DevTag, srcpath, dst, syscall.MS_BIND); err != nil { 915 return fmt.Errorf("failed to add %s mount: %s", srcpath, err) 916 } 917 return nil 918 } 919 920 func (c *container) addSessionDev(devpath string, system *mount.System) error { 921 return c.addSessionDevAt(devpath, devpath, system) 922 } 923 924 func (c *container) addSessionDevMount(system *mount.System) error { 925 if c.devSourcePath == "" { 926 c.devSourcePath, _ = c.session.GetPath("/dev") 927 } 928 err := system.Points.AddBind(mount.DevTag, c.devSourcePath, "/dev", syscall.MS_BIND|syscall.MS_REC) 929 if err != nil { 930 return fmt.Errorf("unable to add dev to mount list: %s", err) 931 } 932 return nil 933 } 934 935 func (c *container) addDevMount(system *mount.System) error { 936 sylog.Debugf("Checking configuration file for 'mount dev'") 937 938 if c.engine.EngineConfig.File.MountDev == "minimal" || c.engine.EngineConfig.GetContain() { 939 sylog.Debugf("Creating temporary staged /dev") 940 if err := c.session.AddDir("/dev"); err != nil { 941 return fmt.Errorf("failed to add /dev session directory: %s", err) 942 } 943 sylog.Debugf("Creating temporary staged /dev/shm") 944 if err := c.session.AddDir("/dev/shm"); err != nil { 945 return fmt.Errorf("failed to add /dev/shm session directory: %s", err) 946 } 947 devshmPath, _ := c.session.GetPath("/dev/shm") 948 flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV) 949 err := system.Points.AddFS(mount.DevTag, devshmPath, c.engine.EngineConfig.File.MemoryFSType, flags, "mode=1777") 950 if err != nil { 951 return fmt.Errorf("failed to add /dev/shm temporary filesystem: %s", err) 952 } 953 954 if c.ipcNS { 955 sylog.Debugf("Creating temporary staged /dev/mqueue") 956 if err := c.session.AddDir("/dev/mqueue"); err != nil { 957 return fmt.Errorf("failed to add /dev/mqueue session directory: %s", err) 958 } 959 mqueuePath, _ := c.session.GetPath("/dev/mqueue") 960 flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV) 961 err := system.Points.AddFS(mount.DevTag, mqueuePath, "mqueue", flags, "") 962 if err != nil { 963 return fmt.Errorf("failed to add /dev/mqueue filesystem: %s", err) 964 } 965 } 966 967 if c.engine.EngineConfig.File.MountDevPts { 968 if _, err := os.Stat("/dev/pts/ptmx"); os.IsNotExist(err) { 969 return fmt.Errorf("Multiple devpts instances unsupported and /dev/pts configured") 970 } 971 972 sylog.Debugf("Creating temporary staged /dev/pts") 973 if err := c.session.AddDir("/dev/pts"); err != nil { 974 return fmt.Errorf("failed to add /dev/pts session directory: %s", err) 975 } 976 977 options := "mode=0620,newinstance,ptmxmode=0666" 978 if !c.userNS { 979 group, err := user.GetGrNam("tty") 980 if err != nil { 981 return fmt.Errorf("Problem resolving 'tty' group GID: %s", err) 982 } 983 options = fmt.Sprintf("%s,gid=%d", options, group.GID) 984 985 } else { 986 sylog.Debugf("Not setting /dev/pts filesystem gid: user namespace enabled") 987 } 988 sylog.Debugf("Mounting devpts for staged /dev/pts") 989 devptsPath, _ := c.session.GetPath("/dev/pts") 990 err = system.Points.AddFS(mount.DevTag, devptsPath, "devpts", syscall.MS_NOSUID|syscall.MS_NOEXEC, options) 991 if err != nil { 992 return fmt.Errorf("failed to add devpts filesystem: %s", err) 993 } 994 // add additional PTY allocation symlink 995 if err := c.session.AddSymlink("/dev/ptmx", "/dev/pts/ptmx"); err != nil { 996 return fmt.Errorf("failed to create /dev/ptmx symlink: %s", err) 997 } 998 999 } 1000 // add /dev/console mount pointing to original tty if there is one 1001 for fd := 0; fd <= 2; fd++ { 1002 if !terminal.IsTerminal(fd) { 1003 continue 1004 } 1005 // Found a tty on stdin, stdout, or stderr. 1006 // Bind mount it at /dev/console. 1007 // readlink() from /proc/self/fd/N isn't as reliable as 1008 // ttyname() (e.g. it doesn't work in docker), but 1009 // there is no golang ttyname() so use this for now 1010 // and also check the device that docker uses, 1011 // /dev/console. 1012 procfd := fmt.Sprintf("/proc/self/fd/%d", fd) 1013 ttylink, err := mainthread.Readlink(procfd) 1014 if err != nil { 1015 return err 1016 } 1017 1018 if _, err := os.Stat(ttylink); err != nil { 1019 // Check if in a system like docker 1020 // using /dev/console already 1021 consinfo := new(syscall.Stat_t) 1022 conserr := syscall.Stat("/dev/console", consinfo) 1023 fdinfo := new(syscall.Stat_t) 1024 fderr := syscall.Fstat(fd, fdinfo) 1025 if conserr == nil && 1026 fderr == nil && 1027 consinfo.Ino == fdinfo.Ino && 1028 consinfo.Rdev == fdinfo.Rdev { 1029 sylog.Debugf("Fd %d is tty pointing to nonexistent %s but /dev/console is good", fd, ttylink) 1030 ttylink = "/dev/console" 1031 1032 } else { 1033 sylog.Debugf("Fd %d is tty but %s doesn't exist, skipping", fd, ttylink) 1034 continue 1035 } 1036 } 1037 sylog.Debugf("Fd %d is tty %s, binding to /dev/console", fd, ttylink) 1038 if err := c.addSessionDevAt(ttylink, "/dev/console", system); err != nil { 1039 return err 1040 } 1041 // and also add a /dev/tty 1042 if err := c.addSessionDev("/dev/tty", system); err != nil { 1043 return err 1044 } 1045 break 1046 } 1047 if err := c.addSessionDev("/dev/null", system); err != nil { 1048 return err 1049 } 1050 if err := c.addSessionDev("/dev/zero", system); err != nil { 1051 return err 1052 } 1053 if err := c.addSessionDev("/dev/random", system); err != nil { 1054 return err 1055 } 1056 if err := c.addSessionDev("/dev/urandom", system); err != nil { 1057 return err 1058 } 1059 if c.engine.EngineConfig.GetNv() { 1060 files, err := ioutil.ReadDir("/dev") 1061 if err != nil { 1062 return fmt.Errorf("failed to read /dev directory: %s", err) 1063 } 1064 for _, file := range files { 1065 if strings.HasPrefix(file.Name(), "nvidia") { 1066 if err := c.addSessionDev(filepath.Join("/dev", file.Name()), system); err != nil { 1067 return err 1068 } 1069 } 1070 } 1071 } 1072 1073 if err := c.addSessionDev("/dev/fd", system); err != nil { 1074 return err 1075 } 1076 if err := c.addSessionDev("/dev/stdin", system); err != nil { 1077 return err 1078 } 1079 if err := c.addSessionDev("/dev/stdout", system); err != nil { 1080 return err 1081 } 1082 if err := c.addSessionDev("/dev/stderr", system); err != nil { 1083 return err 1084 } 1085 1086 // devices could be added in addUserbindsMount so bind session dev 1087 // after that all devices have been added to the mount point list 1088 if err := system.RunAfterTag(mount.LayerTag, c.addSessionDevMount); err != nil { 1089 return err 1090 } 1091 } else if c.engine.EngineConfig.File.MountDev == "yes" { 1092 sylog.Debugf("Adding dev to mount list\n") 1093 err := system.Points.AddBind(mount.DevTag, "/dev", "/dev", syscall.MS_BIND|syscall.MS_REC) 1094 if err != nil { 1095 return fmt.Errorf("unable to add dev to mount list: %s", err) 1096 } 1097 sylog.Verbosef("Default mount: /dev:/dev") 1098 } else if c.engine.EngineConfig.File.MountDev == "no" { 1099 sylog.Verbosef("Not mounting /dev inside the container, disallowed by configuration") 1100 } 1101 return nil 1102 } 1103 1104 func (c *container) addHostMount(system *mount.System) error { 1105 if !c.engine.EngineConfig.File.MountHostfs { 1106 sylog.Debugf("Not mounting host file systems per configuration") 1107 return nil 1108 } 1109 1110 info, err := proc.ParseMountInfo("/proc/self/mountinfo") 1111 if err != nil { 1112 return err 1113 } 1114 flags := uintptr(syscall.MS_BIND | c.suidFlag | syscall.MS_NODEV | syscall.MS_REC) 1115 for _, child := range info["/"] { 1116 if strings.HasPrefix(child, "/proc") { 1117 sylog.Debugf("Skipping /proc based file system") 1118 continue 1119 } else if strings.HasPrefix(child, "/sys") { 1120 sylog.Debugf("Skipping /sys based file system") 1121 continue 1122 } else if strings.HasPrefix(child, "/dev") { 1123 sylog.Debugf("Skipping /dev based file system") 1124 continue 1125 } else if strings.HasPrefix(child, "/run") { 1126 sylog.Debugf("Skipping /run based file system") 1127 continue 1128 } else if strings.HasPrefix(child, "/boot") { 1129 sylog.Debugf("Skipping /boot based file system") 1130 continue 1131 } else if strings.HasPrefix(child, "/var") { 1132 sylog.Debugf("Skipping /var based file system") 1133 continue 1134 } 1135 sylog.Debugf("Adding %s to mount list\n", child) 1136 if err := system.Points.AddBind(mount.HostfsTag, child, child, flags); err != nil { 1137 return fmt.Errorf("unable to add %s to mount list: %s", child, err) 1138 } 1139 system.Points.AddRemount(mount.HostfsTag, child, flags) 1140 } 1141 return nil 1142 } 1143 1144 func (c *container) addBindsMount(system *mount.System) error { 1145 flags := uintptr(syscall.MS_BIND | c.suidFlag | syscall.MS_NODEV | syscall.MS_REC) 1146 1147 if c.engine.EngineConfig.GetContain() { 1148 sylog.Debugf("Skipping bind mounts as contain was requested") 1149 return nil 1150 } 1151 1152 for _, bindpath := range c.engine.EngineConfig.File.BindPath { 1153 splitted := strings.Split(bindpath, ":") 1154 src := splitted[0] 1155 dst := "" 1156 if len(splitted) > 1 { 1157 dst = splitted[1] 1158 } else { 1159 dst = src 1160 } 1161 1162 sylog.Verbosef("Found 'bind path' = %s, %s", src, dst) 1163 err := system.Points.AddBind(mount.BindsTag, src, dst, flags) 1164 if err != nil { 1165 return fmt.Errorf("unable to add %s to mount list: %s", src, err) 1166 } 1167 } 1168 1169 return nil 1170 } 1171 1172 // getHomePaths returns the source and destination path of the requested home mount 1173 func (c *container) getHomePaths() (source string, dest string, err error) { 1174 if c.engine.EngineConfig.GetCustomHome() { 1175 dest = filepath.Clean(c.engine.EngineConfig.GetHomeDest()) 1176 source, err = filepath.Abs(filepath.Clean(c.engine.EngineConfig.GetHomeSource())) 1177 } else { 1178 pw, err := user.GetPwUID(uint32(os.Getuid())) 1179 if err == nil { 1180 dest = pw.Dir 1181 source = pw.Dir 1182 } 1183 } 1184 1185 return source, dest, err 1186 } 1187 1188 // addHomeStagingDir adds and mounts home directory in session staging directory 1189 func (c *container) addHomeStagingDir(system *mount.System, source string, dest string) (string, error) { 1190 flags := uintptr(syscall.MS_BIND | c.suidFlag | syscall.MS_NODEV | syscall.MS_REC) 1191 homeStage := "" 1192 1193 if err := c.session.AddDir(dest); err != nil { 1194 return "", fmt.Errorf("failed to add %s as session directory: %s", source, err) 1195 } 1196 1197 homeStage, _ = c.session.GetPath(dest) 1198 1199 if !c.engine.EngineConfig.GetContain() || c.engine.EngineConfig.GetCustomHome() { 1200 sylog.Debugf("Staging home directory (%v) at %v\n", source, homeStage) 1201 1202 if err := system.Points.AddBind(mount.HomeTag, source, homeStage, flags); err != nil { 1203 return "", fmt.Errorf("unable to add %s to mount list: %s", source, err) 1204 } 1205 system.Points.AddRemount(mount.HomeTag, homeStage, flags) 1206 } else { 1207 sylog.Debugf("Using session directory for home directory") 1208 } 1209 1210 return homeStage, nil 1211 } 1212 1213 // addHomeLayer adds the home mount when using either overlay or underlay 1214 func (c *container) addHomeLayer(system *mount.System, source, dest string) error { 1215 flags := uintptr(syscall.MS_BIND | c.suidFlag | syscall.MS_NODEV | syscall.MS_REC) 1216 1217 if err := system.Points.AddBind(mount.HomeTag, source, dest, flags); err != nil { 1218 return fmt.Errorf("unable to add home to mount list: %s", err) 1219 } 1220 1221 return system.Points.AddRemount(mount.HomeTag, dest, flags) 1222 } 1223 1224 // addHomeNoLayer is responsible for staging the home directory and adding the base 1225 // directory of the staged home into the container when overlay/underlay are unavailable 1226 func (c *container) addHomeNoLayer(system *mount.System, source, dest string) error { 1227 flags := uintptr(syscall.MS_BIND | c.suidFlag | syscall.MS_NODEV | syscall.MS_REC) 1228 1229 homeBase := fs.RootDir(dest) 1230 if homeBase == "." { 1231 return fmt.Errorf("could not identify staged home directory base: %s", dest) 1232 } 1233 1234 homeStageBase, _ := c.session.GetPath(homeBase) 1235 sylog.Verbosef("Mounting staged home directory base (%v) into container at %v\n", homeStageBase, filepath.Join(c.session.FinalPath(), homeBase)) 1236 if err := system.Points.AddBind(mount.FinalTag, homeStageBase, homeBase, flags); err != nil { 1237 return fmt.Errorf("unable to add %s to mount list: %s", homeStageBase, err) 1238 } 1239 1240 return nil 1241 } 1242 1243 // addHomeMount is responsible for adding the home directory mount using the proper method 1244 func (c *container) addHomeMount(system *mount.System) error { 1245 if c.engine.EngineConfig.GetNoHome() { 1246 sylog.Debugf("Skipping home directory mount by user request.") 1247 return nil 1248 } 1249 1250 if !c.engine.EngineConfig.File.MountHome { 1251 sylog.Debugf("Skipping home dir mounting (per config)") 1252 return nil 1253 } 1254 1255 // check if user attempt to mount a custom home when not allowed to 1256 if c.engine.EngineConfig.GetCustomHome() && !c.engine.EngineConfig.File.UserBindControl { 1257 return fmt.Errorf("Not mounting user requested home: user bind control is disallowed") 1258 } 1259 1260 source, dest, err := c.getHomePaths() 1261 if err != nil { 1262 return fmt.Errorf("unable to get home source/destination: %v", err) 1263 } 1264 1265 stagingDir, err := c.addHomeStagingDir(system, source, dest) 1266 if err != nil { 1267 return err 1268 } 1269 1270 sylog.Debugf("Adding home directory mount [%v:%v] to list using layer: %v\n", stagingDir, dest, c.sessionLayerType) 1271 if !c.isLayerEnabled() { 1272 return c.addHomeNoLayer(system, stagingDir, dest) 1273 } 1274 return c.addHomeLayer(system, stagingDir, dest) 1275 } 1276 1277 func (c *container) addUserbindsMount(system *mount.System) error { 1278 devicesMounted := 0 1279 devPrefix := "/dev" 1280 userBindControl := c.engine.EngineConfig.File.UserBindControl 1281 flags := uintptr(syscall.MS_BIND | c.suidFlag | syscall.MS_NODEV | syscall.MS_REC) 1282 1283 if len(c.engine.EngineConfig.GetBindPath()) == 0 { 1284 return nil 1285 } 1286 1287 for _, b := range c.engine.EngineConfig.GetBindPath() { 1288 splitted := strings.Split(b, ":") 1289 1290 src, err := filepath.Abs(splitted[0]) 1291 if err != nil { 1292 sylog.Warningf("Can't determine absolute path of %s bind point", splitted[0]) 1293 continue 1294 } 1295 dst := src 1296 if len(splitted) > 1 { 1297 dst = splitted[1] 1298 } 1299 if len(splitted) > 2 { 1300 if splitted[2] == "ro" { 1301 flags |= syscall.MS_RDONLY 1302 } else if splitted[2] != "rw" { 1303 sylog.Warningf("Not mounting requested %s bind point, invalid mount option %s", src, splitted[2]) 1304 } 1305 } 1306 1307 // special case for /dev mount to override default mount behaviour 1308 // with --contain option or 'mount dev = minimal' 1309 if strings.HasPrefix(src, devPrefix) { 1310 if c.engine.EngineConfig.File.MountDev == "minimal" || c.engine.EngineConfig.GetContain() { 1311 if strings.HasPrefix(src, "/dev/shm/") || strings.HasPrefix(src, "/dev/mqueue/") { 1312 sylog.Warningf("Skipping %s bind mount: not allowed", src) 1313 } else { 1314 if src != devPrefix { 1315 if err := c.addSessionDev(src, system); err != nil { 1316 sylog.Warningf("Skipping %s bind mount: %s", src, err) 1317 } 1318 } else { 1319 system.Points.RemoveByTag(mount.DevTag) 1320 c.devSourcePath = devPrefix 1321 } 1322 sylog.Debugf("Adding device %s to mount list\n", src) 1323 } 1324 devicesMounted++ 1325 } else if c.engine.EngineConfig.File.MountDev == "yes" { 1326 sylog.Warningf("Skipping %s bind mount: /dev is already mounted", src) 1327 } else { 1328 sylog.Warningf("Skipping %s bind mount: disallowed by configuration", src) 1329 } 1330 continue 1331 } else if !userBindControl { 1332 continue 1333 } 1334 1335 sylog.Debugf("Adding %s to mount list\n", src) 1336 1337 if err := system.Points.AddBind(mount.UserbindsTag, src, dst, flags); err != nil && err == mount.ErrMountExists { 1338 sylog.Warningf("destination %s already in mount list: %s", src, err) 1339 } else if err != nil { 1340 return fmt.Errorf("unable to add %s to mount list: %s", src, err) 1341 } else { 1342 system.Points.AddRemount(mount.UserbindsTag, dst, flags) 1343 flags &^= syscall.MS_RDONLY 1344 } 1345 } 1346 1347 sylog.Debugf("Checking for 'user bind control' in configuration file") 1348 if !userBindControl && devicesMounted == 0 { 1349 sylog.Warningf("Ignoring user bind request: user bind control disabled by system administrator") 1350 } 1351 1352 return nil 1353 } 1354 1355 func (c *container) addTmpMount(system *mount.System) error { 1356 sylog.Debugf("Checking for 'mount tmp' in configuration file") 1357 if !c.engine.EngineConfig.File.MountTmp { 1358 sylog.Verbosef("Skipping tmp dir mounting (per config)") 1359 return nil 1360 } 1361 tmpSource := "/tmp" 1362 vartmpSource := "/var/tmp" 1363 1364 if c.engine.EngineConfig.GetContain() { 1365 workdir := c.engine.EngineConfig.GetWorkdir() 1366 if workdir != "" { 1367 if !c.engine.EngineConfig.File.UserBindControl { 1368 sylog.Warningf("User bind control is disabled by system administrator") 1369 return nil 1370 } 1371 1372 vartmpSource = "var_tmp" 1373 1374 workdir, err := filepath.Abs(filepath.Clean(workdir)) 1375 if err != nil { 1376 sylog.Warningf("Can't determine absolute path of workdir %s", workdir) 1377 } 1378 1379 tmpSource = filepath.Join(workdir, tmpSource) 1380 vartmpSource = filepath.Join(workdir, vartmpSource) 1381 1382 if err := fs.Mkdir(tmpSource, os.ModeSticky|0777); err != nil && !os.IsExist(err) { 1383 return fmt.Errorf("failed to create %s: %s", tmpSource, err) 1384 } 1385 if err := fs.Mkdir(vartmpSource, os.ModeSticky|0777); err != nil && !os.IsExist(err) { 1386 return fmt.Errorf("failed to create %s: %s", vartmpSource, err) 1387 } 1388 } else { 1389 if _, err := c.session.GetPath(tmpSource); err != nil { 1390 if err := c.session.AddDir(tmpSource); err != nil { 1391 return err 1392 } 1393 if err := c.session.Chmod(tmpSource, os.ModeSticky|0777); err != nil { 1394 return err 1395 } 1396 } 1397 if _, err := c.session.GetPath(vartmpSource); err != nil { 1398 if err := c.session.AddDir(vartmpSource); err != nil { 1399 return err 1400 } 1401 if err := c.session.Chmod(vartmpSource, os.ModeSticky|0777); err != nil { 1402 return err 1403 } 1404 } 1405 tmpSource, _ = c.session.GetPath(tmpSource) 1406 vartmpSource, _ = c.session.GetPath(vartmpSource) 1407 } 1408 } 1409 flags := uintptr(syscall.MS_BIND | c.suidFlag | syscall.MS_NODEV | syscall.MS_REC) 1410 1411 if err := system.Points.AddBind(mount.TmpTag, tmpSource, "/tmp", flags); err == nil { 1412 system.Points.AddRemount(mount.TmpTag, "/tmp", flags) 1413 sylog.Verbosef("Default mount: /tmp:/tmp") 1414 } else { 1415 return fmt.Errorf("could not mount container's /tmp directory: %s %s", err, tmpSource) 1416 } 1417 if err := system.Points.AddBind(mount.TmpTag, vartmpSource, "/var/tmp", flags); err == nil { 1418 system.Points.AddRemount(mount.TmpTag, "/var/tmp", flags) 1419 sylog.Verbosef("Default mount: /var/tmp:/var/tmp") 1420 } else { 1421 return fmt.Errorf("could not mount container's /var/tmp directory: %s", err) 1422 } 1423 return nil 1424 } 1425 1426 func (c *container) addScratchMount(system *mount.System) error { 1427 hasWorkdir := false 1428 1429 scratchdir := c.engine.EngineConfig.GetScratchDir() 1430 if len(scratchdir) == 0 { 1431 sylog.Debugf("Not mounting scratch directory: Not requested") 1432 return nil 1433 } else if len(scratchdir) == 1 { 1434 scratchdir = strings.Split(filepath.Clean(scratchdir[0]), ",") 1435 } 1436 if !c.engine.EngineConfig.File.UserBindControl { 1437 sylog.Verbosef("Not mounting scratch: user bind control disabled by system administrator") 1438 return nil 1439 } 1440 workdir := c.engine.EngineConfig.GetWorkdir() 1441 sourceDir := "" 1442 if workdir != "" { 1443 hasWorkdir = true 1444 sourceDir = filepath.Clean(workdir) + "/scratch" 1445 } else { 1446 sourceDir = c.session.Path() 1447 } 1448 if hasWorkdir { 1449 if err := fs.MkdirAll(sourceDir, 0750); err != nil { 1450 return fmt.Errorf("could not create scratch working directory %s: %s", sourceDir, err) 1451 } 1452 } 1453 for _, dir := range scratchdir { 1454 fullSourceDir := "" 1455 1456 if hasWorkdir { 1457 fullSourceDir = filepath.Join(sourceDir, filepath.Base(dir)) 1458 if err := fs.MkdirAll(fullSourceDir, 0750); err != nil && !os.IsExist(err) { 1459 return fmt.Errorf("could not create scratch working directory %s: %s", sourceDir, err) 1460 } 1461 } else { 1462 src := filepath.Join("/scratch", dir) 1463 if err := c.session.AddDir(src); err != nil { 1464 return fmt.Errorf("could not create scratch working directory %s: %s", sourceDir, err) 1465 } 1466 fullSourceDir, _ = c.session.GetPath(src) 1467 } 1468 flags := uintptr(syscall.MS_BIND | c.suidFlag | syscall.MS_NODEV | syscall.MS_REC) 1469 if err := system.Points.AddBind(mount.ScratchTag, fullSourceDir, dir, flags); err != nil { 1470 return fmt.Errorf("could not bind scratch directory %s into container: %s", fullSourceDir, err) 1471 } 1472 system.Points.AddRemount(mount.ScratchTag, dir, flags) 1473 } 1474 return nil 1475 } 1476 1477 func (c *container) addCwdMount(system *mount.System) error { 1478 cwd := "" 1479 1480 if c.engine.EngineConfig.GetContain() { 1481 sylog.Verbosef("Not mounting current directory: container was requested") 1482 return nil 1483 } 1484 if !c.engine.EngineConfig.File.UserBindControl { 1485 sylog.Warningf("Not mounting current directory: user bind control is disabled by system administrator") 1486 return nil 1487 } 1488 if c.engine.EngineConfig.OciConfig.Process == nil { 1489 return nil 1490 } 1491 cwd = c.engine.EngineConfig.OciConfig.Process.Cwd 1492 if err := os.Chdir(cwd); err != nil { 1493 if os.IsNotExist(err) { 1494 sylog.Debugf("Container working directory %s doesn't exist, will retry after chroot", cwd) 1495 } else { 1496 sylog.Warningf("Could not set container working directory %s: %s", cwd, err) 1497 } 1498 return nil 1499 } 1500 current, err := os.Getwd() 1501 if err != nil { 1502 return fmt.Errorf("could not obtain current directory path: %s", err) 1503 } 1504 switch current { 1505 case "/", "/etc", "/bin", "/mnt", "/usr", "/var", "/opt", "/sbin", "/lib", "/lib64": 1506 sylog.Verbosef("Not mounting CWD within operating system directory: %s", current) 1507 return nil 1508 } 1509 if strings.HasPrefix(current, "/sys") || strings.HasPrefix(current, "/proc") || strings.HasPrefix(current, "/dev") { 1510 sylog.Verbosef("Not mounting CWD within virtual directory: %s", current) 1511 return nil 1512 } 1513 flags := uintptr(syscall.MS_BIND | c.suidFlag | syscall.MS_NODEV | syscall.MS_REC) 1514 if err := system.Points.AddBind(mount.CwdTag, current, cwd, flags); err == nil { 1515 system.Points.AddRemount(mount.CwdTag, cwd, flags) 1516 c.checkDest = append(c.checkDest, cwd) 1517 sylog.Verbosef("Default mount: %v: to the container", cwd) 1518 } else { 1519 sylog.Warningf("Could not bind CWD to container %s: %s", current, err) 1520 } 1521 return nil 1522 } 1523 1524 func (c *container) addLibsMount(system *mount.System) error { 1525 sylog.Debugf("Checking for 'user bind control' in configuration file") 1526 if !c.engine.EngineConfig.File.UserBindControl { 1527 sylog.Warningf("Ignoring libraries bind request: user bind control disabled by system administrator") 1528 return nil 1529 } 1530 1531 flags := uintptr(syscall.MS_BIND | syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_RDONLY | syscall.MS_REC) 1532 1533 containerDir := "/.singularity.d/libs" 1534 sessionDir := "/libs" 1535 1536 if err := c.session.AddDir(sessionDir); err != nil { 1537 return err 1538 } 1539 1540 libraries := c.engine.EngineConfig.GetLibrariesPath() 1541 1542 for _, lib := range libraries { 1543 sylog.Debugf("Add library %s to mount list", lib) 1544 1545 file := filepath.Base(lib) 1546 sessionFile := filepath.Join(sessionDir, file) 1547 1548 if err := c.session.AddFile(sessionFile, []byte{}); err != nil { 1549 return err 1550 } 1551 1552 sessionFilePath, _ := c.session.GetPath(sessionFile) 1553 1554 err := system.Points.AddBind(mount.FilesTag, lib, sessionFilePath, flags) 1555 if err != nil { 1556 return fmt.Errorf("unable to add %s to mount list: %s", lib, err) 1557 } 1558 1559 system.Points.AddRemount(mount.FilesTag, sessionFilePath, flags) 1560 } 1561 1562 if len(libraries) > 0 { 1563 sessionDirPath, _ := c.session.GetPath(sessionDir) 1564 1565 err := system.Points.AddBind(mount.FilesTag, sessionDirPath, containerDir, flags) 1566 if err != nil { 1567 return fmt.Errorf("unable to add %s to mount list: %s", sessionDirPath, err) 1568 } 1569 1570 return system.Points.AddRemount(mount.FilesTag, containerDir, flags) 1571 } 1572 1573 return nil 1574 } 1575 1576 func (c *container) addIdentityMount(system *mount.System) error { 1577 if os.Geteuid() == 0 && c.engine.EngineConfig.GetTargetUID() == 0 { 1578 sylog.Verbosef("Not updating passwd/group files, running as root!") 1579 return nil 1580 } 1581 1582 rootfs := c.session.RootFsPath() 1583 defer c.session.Update() 1584 1585 uid := os.Getuid() 1586 if uid == 0 && c.engine.EngineConfig.GetTargetUID() != 0 { 1587 uid = c.engine.EngineConfig.GetTargetUID() 1588 } 1589 1590 if c.engine.EngineConfig.File.ConfigPasswd { 1591 passwd := filepath.Join(rootfs, "/etc/passwd") 1592 _, home, err := c.getHomePaths() 1593 if err != nil { 1594 sylog.Warningf("%s", err) 1595 } else { 1596 content, err := files.Passwd(passwd, home, uid) 1597 if err != nil { 1598 sylog.Warningf("%s", err) 1599 } else { 1600 if err := c.session.AddFile("/etc/passwd", content); err != nil { 1601 sylog.Warningf("failed to add passwd session file: %s", err) 1602 } 1603 passwd, _ = c.session.GetPath("/etc/passwd") 1604 1605 sylog.Debugf("Adding /etc/passwd to mount list\n") 1606 err = system.Points.AddBind(mount.FilesTag, passwd, "/etc/passwd", syscall.MS_BIND) 1607 if err != nil { 1608 return fmt.Errorf("unable to add /etc/passwd to mount list: %s", err) 1609 } 1610 sylog.Verbosef("Default mount: /etc/passwd:/etc/passwd") 1611 } 1612 } 1613 } else { 1614 sylog.Verbosef("Skipping bind of the host's /etc/passwd") 1615 } 1616 1617 if c.engine.EngineConfig.File.ConfigGroup { 1618 group := filepath.Join(rootfs, "/etc/group") 1619 content, err := files.Group(group, uid, c.engine.EngineConfig.GetTargetGID()) 1620 if err != nil { 1621 sylog.Warningf("%s", err) 1622 } else { 1623 if err := c.session.AddFile("/etc/group", content); err != nil { 1624 sylog.Warningf("failed to add group session file: %s", err) 1625 } 1626 group, _ = c.session.GetPath("/etc/group") 1627 1628 sylog.Debugf("Adding /etc/group to mount list\n") 1629 err = system.Points.AddBind(mount.FilesTag, group, "/etc/group", syscall.MS_BIND) 1630 if err != nil { 1631 return fmt.Errorf("unable to add /etc/group to mount list: %s", err) 1632 } 1633 sylog.Verbosef("Default mount: /etc/group:/etc/group") 1634 } 1635 } else { 1636 sylog.Verbosef("Skipping bind of the host's /etc/group") 1637 } 1638 1639 return nil 1640 } 1641 1642 func (c *container) addResolvConfMount(system *mount.System) error { 1643 resolvConf := "/etc/resolv.conf" 1644 1645 if c.engine.EngineConfig.File.ConfigResolvConf { 1646 var err error 1647 var content []byte 1648 1649 dns := c.engine.EngineConfig.GetDNS() 1650 1651 if dns == "" { 1652 r, err := os.Open(resolvConf) 1653 if err != nil { 1654 return err 1655 } 1656 content, err = ioutil.ReadAll(r) 1657 if err != nil { 1658 return err 1659 } 1660 } else { 1661 dns = strings.Replace(dns, " ", "", -1) 1662 content, err = files.ResolvConf(strings.Split(dns, ",")) 1663 if err != nil { 1664 return err 1665 } 1666 } 1667 if err := c.session.AddFile(resolvConf, content); err != nil { 1668 sylog.Warningf("failed to add resolv.conf session file: %s", err) 1669 } 1670 sessionFile, _ := c.session.GetPath(resolvConf) 1671 1672 sylog.Debugf("Adding %s to mount list\n", resolvConf) 1673 err = system.Points.AddBind(mount.FilesTag, sessionFile, resolvConf, syscall.MS_BIND) 1674 if err != nil { 1675 return fmt.Errorf("unable to add %s to mount list: %s", resolvConf, err) 1676 } 1677 sylog.Verbosef("Default mount: /etc/resolv.conf:/etc/resolv.conf") 1678 } else { 1679 sylog.Verbosef("Skipping bind of the host's %s", resolvConf) 1680 } 1681 return nil 1682 } 1683 1684 func (c *container) addHostnameMount(system *mount.System) error { 1685 hostnameFile := "/etc/hostname" 1686 1687 if c.utsNS { 1688 hostname := c.engine.EngineConfig.GetHostname() 1689 if hostname != "" { 1690 sylog.Debugf("Set container hostname %s", hostname) 1691 1692 content, err := files.Hostname(hostname) 1693 if err != nil { 1694 return fmt.Errorf("unable to add %s to hostname file: %s", hostname, err) 1695 } 1696 if err := c.session.AddFile(hostnameFile, content); err != nil { 1697 return fmt.Errorf("failed to add hostname session file: %s", err) 1698 } 1699 sessionFile, _ := c.session.GetPath(hostnameFile) 1700 1701 sylog.Debugf("Adding %s to mount list\n", hostnameFile) 1702 err = system.Points.AddBind(mount.FilesTag, sessionFile, hostnameFile, syscall.MS_BIND) 1703 if err != nil { 1704 return fmt.Errorf("unable to add %s to mount list: %s", hostnameFile, err) 1705 } 1706 sylog.Verbosef("Default mount: /etc/hostname:/etc/hostname") 1707 if _, err := c.rpcOps.SetHostname(hostname); err != nil { 1708 return fmt.Errorf("failed to set container hostname: %s", err) 1709 } 1710 } 1711 } else { 1712 sylog.Debugf("Skipping hostname mount, not virtualizing UTS namespace on user request") 1713 } 1714 return nil 1715 } 1716 1717 func (c *container) addActionsMount(system *mount.System) error { 1718 hostDir := filepath.Join(buildcfg.SYSCONFDIR, "/singularity/actions") 1719 containerDir := "/.singularity.d/actions" 1720 flags := uintptr(syscall.MS_BIND | syscall.MS_RDONLY | syscall.MS_NOSUID | syscall.MS_NODEV) 1721 1722 actionsDir := filepath.Join(c.session.RootFsPath(), containerDir) 1723 if !fs.IsDir(actionsDir) { 1724 sylog.Debugf("Ignoring actions mount, %s doesn't exist", actionsDir) 1725 return nil 1726 } 1727 1728 err := system.Points.AddBind(mount.BindsTag, hostDir, containerDir, flags) 1729 if err != nil { 1730 return fmt.Errorf("unable to add %s to mount list: %s", containerDir, err) 1731 } 1732 1733 return system.Points.AddRemount(mount.BindsTag, containerDir, flags) 1734 }