github.com/apptainer/singularity@v3.1.1+incompatible/internal/pkg/runtime/engines/oci/create.go (about) 1 // Copyright (c) 2018, Sylabs Inc. All rights reserved. 2 // This software is licensed under a 3-clause BSD license. Please consult the 3 // LICENSE.md file distributed with the sources of this project regarding your 4 // rights to use or distribute this software. 5 6 package oci 7 8 import ( 9 "bufio" 10 "encoding/json" 11 "fmt" 12 "net" 13 "net/rpc" 14 "os" 15 "path/filepath" 16 "strings" 17 "syscall" 18 "time" 19 20 "github.com/sylabs/singularity/pkg/ociruntime" 21 "github.com/sylabs/singularity/pkg/util/namespaces" 22 "github.com/sylabs/singularity/pkg/util/sysctl" 23 "github.com/sylabs/singularity/pkg/util/unix" 24 25 specs "github.com/opencontainers/runtime-spec/specs-go" 26 "github.com/sylabs/singularity/internal/pkg/cgroups" 27 "github.com/sylabs/singularity/internal/pkg/instance" 28 "github.com/sylabs/singularity/internal/pkg/runtime/engines/oci/rpc/client" 29 "github.com/sylabs/singularity/internal/pkg/sylog" 30 "github.com/sylabs/singularity/internal/pkg/util/fs" 31 "github.com/sylabs/singularity/internal/pkg/util/fs/mount" 32 "github.com/sylabs/singularity/pkg/util/fs/proc" 33 ) 34 35 var symlinkDevices = []struct { 36 old string 37 new string 38 }{ 39 {"/proc/self/fd", "/dev/fd"}, 40 {"/proc/kcore", "/dev/core"}, 41 {"pts/ptmx", "/dev/ptmx"}, 42 {"/proc/self/fd/0", "/dev/stdin"}, 43 {"/proc/self/fd/1", "/dev/stdout"}, 44 {"/proc/self/fd/2", "/dev/stderr"}, 45 } 46 47 type device struct { 48 major int64 49 minor int64 50 path string 51 mode os.FileMode 52 uid int 53 gid int 54 } 55 56 var devices = []device{ 57 {1, 7, "/dev/full", syscall.S_IFCHR | 0666, 0, 0}, 58 {1, 3, "/dev/null", syscall.S_IFCHR | 0666, 0, 0}, 59 {1, 8, "/dev/random", syscall.S_IFCHR | 0666, 0, 0}, 60 {5, 0, "/dev/tty", syscall.S_IFCHR | 0666, 0, 0}, 61 {1, 9, "/dev/urandom", syscall.S_IFCHR | 0666, 0, 0}, 62 {1, 5, "/dev/zero", syscall.S_IFCHR | 0666, 0, 0}, 63 } 64 65 func int64ptr(i int) *int64 { 66 t := int64(i) 67 return &t 68 } 69 70 var cgroupDevices = []specs.LinuxDeviceCgroup{ 71 { 72 Allow: true, 73 Type: "c", 74 Major: int64ptr(1), 75 Minor: int64ptr(7), 76 Access: "rw", 77 }, 78 { 79 Allow: true, 80 Type: "c", 81 Major: int64ptr(1), 82 Minor: int64ptr(3), 83 Access: "rw", 84 }, 85 { 86 Allow: true, 87 Type: "c", 88 Major: int64ptr(1), 89 Minor: int64ptr(8), 90 Access: "rw", 91 }, 92 { 93 Allow: true, 94 Type: "c", 95 Major: int64ptr(5), 96 Minor: int64ptr(0), 97 Access: "rw", 98 }, 99 { 100 Allow: true, 101 Type: "c", 102 Major: int64ptr(1), 103 Minor: int64ptr(9), 104 Access: "rw", 105 }, 106 { 107 Allow: true, 108 Type: "c", 109 Major: int64ptr(1), 110 Minor: int64ptr(5), 111 Access: "rw", 112 }, 113 { 114 Allow: true, 115 Type: "c", 116 Major: int64ptr(136), 117 Access: "rwm", 118 }, 119 { 120 Allow: true, 121 Type: "c", 122 Major: int64ptr(5), 123 Minor: int64ptr(1), 124 Access: "rw", 125 }, 126 { 127 Allow: true, 128 Type: "c", 129 Major: int64ptr(5), 130 Minor: int64ptr(2), 131 Access: "rw", 132 }, 133 } 134 135 type container struct { 136 engine *EngineOperations 137 rpcOps *client.RPC 138 rootfs string 139 rpcRoot string 140 userNS bool 141 utsNS bool 142 mntNS bool 143 devIndex int 144 cgroupIndex int 145 } 146 147 var statusChan = make(chan string, 1) 148 149 func (engine *EngineOperations) createState(pid int) error { 150 engine.EngineConfig.Lock() 151 defer engine.EngineConfig.Unlock() 152 153 name := engine.CommonConfig.ContainerID 154 155 file, err := instance.Add(name, true, instance.OciSubDir) 156 if err != nil { 157 return err 158 } 159 160 engine.EngineConfig.State.Version = specs.Version 161 engine.EngineConfig.State.Bundle = engine.EngineConfig.GetBundlePath() 162 engine.EngineConfig.State.ID = engine.CommonConfig.ContainerID 163 engine.EngineConfig.State.Pid = pid 164 engine.EngineConfig.State.Status = ociruntime.Creating 165 engine.EngineConfig.State.Annotations = engine.EngineConfig.OciConfig.Annotations 166 167 file.Config, err = json.Marshal(engine.CommonConfig) 168 if err != nil { 169 return err 170 } 171 172 file.User = "root" 173 file.Pid = pid 174 file.PPid = os.Getpid() 175 file.Image = filepath.Join(engine.EngineConfig.GetBundlePath(), engine.EngineConfig.OciConfig.Root.Path) 176 177 if err := file.Update(); err != nil { 178 return err 179 } 180 181 socketPath := engine.EngineConfig.SyncSocket 182 183 if socketPath != "" { 184 data, err := json.Marshal(engine.EngineConfig.State) 185 if err != nil { 186 sylog.Warningf("failed to marshal state data: %s", err) 187 } else if err := unix.WriteSocket(socketPath, data); err != nil { 188 sylog.Warningf("%s", err) 189 } 190 } 191 192 return nil 193 } 194 195 func (engine *EngineOperations) updateState(status string) error { 196 engine.EngineConfig.Lock() 197 defer engine.EngineConfig.Unlock() 198 199 file, err := instance.Get(engine.CommonConfig.ContainerID, instance.OciSubDir) 200 if err != nil { 201 return err 202 } 203 // do nothing if already stopped 204 if engine.EngineConfig.State.Status == ociruntime.Stopped { 205 return nil 206 } 207 oldStatus := engine.EngineConfig.State.Status 208 engine.EngineConfig.State.Status = status 209 210 t := time.Now().UnixNano() 211 212 switch status { 213 case ociruntime.Created: 214 if engine.EngineConfig.State.CreatedAt == nil { 215 engine.EngineConfig.State.CreatedAt = &t 216 } 217 case ociruntime.Running: 218 if engine.EngineConfig.State.StartedAt == nil { 219 engine.EngineConfig.State.StartedAt = &t 220 } 221 case ociruntime.Stopped: 222 if engine.EngineConfig.State.FinishedAt == nil { 223 engine.EngineConfig.State.FinishedAt = &t 224 } 225 } 226 227 file.Config, err = json.Marshal(engine.CommonConfig) 228 if err != nil { 229 return err 230 } 231 232 if err := file.Update(); err != nil { 233 return err 234 } 235 236 socketPath := engine.EngineConfig.SyncSocket 237 238 if socketPath != "" { 239 data, err := json.Marshal(engine.EngineConfig.State) 240 if err != nil { 241 sylog.Warningf("failed to marshal state data: %s", err) 242 } else if err := unix.WriteSocket(socketPath, data); err != nil { 243 sylog.Warningf("%s", err) 244 } 245 } 246 247 // send running or stopped status right after container creation 248 // to notify that container process started 249 if statusChan != nil && oldStatus == ociruntime.Created && 250 (status == ociruntime.Running || status == ociruntime.Stopped) { 251 statusChan <- status 252 } 253 return nil 254 } 255 256 // one shot function to wait on running or stopped status 257 func (engine *EngineOperations) waitStatusUpdate() { 258 if statusChan == nil { 259 return 260 } 261 // block until status update is sent 262 <-statusChan 263 // close channel and set it to nil 264 close(statusChan) 265 statusChan = nil 266 } 267 268 // CreateContainer creates a container 269 func (engine *EngineOperations) CreateContainer(pid int, rpcConn net.Conn) error { 270 var err error 271 272 if engine.CommonConfig.EngineName != Name { 273 return fmt.Errorf("engineName configuration doesn't match runtime name") 274 } 275 276 rpcOps := &client.RPC{} 277 rpcOps.Client = rpc.NewClient(rpcConn) 278 rpcOps.Name = engine.CommonConfig.EngineName 279 280 if rpcOps.Client == nil { 281 return fmt.Errorf("failed to initialize RPC client") 282 } 283 284 if err := engine.createState(pid); err != nil { 285 return err 286 } 287 288 rootfs := engine.EngineConfig.OciConfig.Root.Path 289 290 if !filepath.IsAbs(rootfs) { 291 rootfs = filepath.Join(engine.EngineConfig.GetBundlePath(), rootfs) 292 } 293 294 resolvedRootfs, err := filepath.EvalSymlinks(rootfs) 295 if err != nil { 296 return fmt.Errorf("failed to resolve %s path: %s", rootfs, err) 297 } 298 299 c := &container{ 300 engine: engine, 301 rpcOps: rpcOps, 302 rootfs: resolvedRootfs, 303 rpcRoot: fmt.Sprintf("/proc/%d/root", pid), 304 cgroupIndex: -1, 305 devIndex: -1, 306 } 307 308 for _, ns := range engine.EngineConfig.OciConfig.Linux.Namespaces { 309 switch ns.Type { 310 case specs.UserNamespace: 311 c.userNS = true 312 case specs.UTSNamespace: 313 c.utsNS = true 314 case specs.MountNamespace: 315 c.mntNS = true 316 } 317 } 318 319 p := &mount.Points{} 320 if engine.EngineConfig.OciConfig.Linux.MountLabel != "" { 321 if err := p.SetContext(engine.EngineConfig.OciConfig.Linux.MountLabel); err != nil { 322 return err 323 } 324 } 325 326 system := &mount.System{Points: p, Mount: c.mount} 327 328 for i, point := range engine.EngineConfig.OciConfig.Config.Mounts { 329 // cgroup creation 330 if point.Type == "cgroup" { 331 c.cgroupIndex = i 332 continue 333 } 334 // dev creation 335 if point.Destination == "/dev" && point.Type == "tmpfs" { 336 c.devIndex = i 337 } 338 } 339 340 if err := c.addDevices(system); err != nil { 341 return err 342 } 343 344 if err := c.addCgroups(pid, system); err != nil { 345 return err 346 } 347 348 // import OCI mount spec 349 if err := system.Points.ImportFromSpec(engine.EngineConfig.OciConfig.Config.Mounts); err != nil { 350 return err 351 } 352 353 if err := c.addRootfsMount(system); err != nil { 354 return err 355 } 356 357 if err := system.RunAfterTag(mount.KernelTag, c.addDefaultDevices); err != nil { 358 return err 359 } 360 361 if err := system.RunAfterTag(mount.KernelTag, c.addAllPaths); err != nil { 362 return err 363 } 364 365 if err := proc.SetOOMScoreAdj(pid, engine.EngineConfig.OciConfig.Process.OOMScoreAdj); err != nil { 366 return err 367 } 368 369 if err := namespaces.Enter(pid, "ipc"); err != nil { 370 return err 371 } 372 if err := namespaces.Enter(pid, "net"); err != nil { 373 return err 374 } 375 376 for key, value := range engine.EngineConfig.OciConfig.Linux.Sysctl { 377 if err := sysctl.Set(key, value); err != nil { 378 return err 379 } 380 } 381 382 if err := namespaces.Enter(os.Getpid(), "ipc"); err != nil { 383 return err 384 } 385 if err := namespaces.Enter(os.Getpid(), "net"); err != nil { 386 return err 387 } 388 389 sylog.Debugf("Mount all") 390 if err := system.MountAll(); err != nil { 391 return err 392 } 393 394 if c.utsNS && engine.EngineConfig.OciConfig.Hostname != "" { 395 if _, err := rpcOps.SetHostname(engine.EngineConfig.OciConfig.Hostname); err != nil { 396 return err 397 } 398 } 399 400 // update namespaces configuration path 401 namespaces := []struct { 402 nstype string 403 ns specs.LinuxNamespaceType 404 checkEnabled bool 405 }{ 406 {"pid", specs.PIDNamespace, false}, 407 {"uts", specs.UTSNamespace, false}, 408 {"ipc", specs.IPCNamespace, false}, 409 {"mnt", specs.MountNamespace, false}, 410 {"cgroup", specs.CgroupNamespace, false}, 411 {"net", specs.NetworkNamespace, false}, 412 {"user", specs.UserNamespace, true}, 413 } 414 415 path := fmt.Sprintf("/proc/%d/ns", pid) 416 417 for _, n := range namespaces { 418 has, err := proc.HasNamespace(pid, n.nstype) 419 if err == nil && (has || n.checkEnabled) { 420 enabled := false 421 if n.checkEnabled { 422 if engine.EngineConfig.OciConfig.Linux != nil { 423 for _, namespace := range engine.EngineConfig.OciConfig.Linux.Namespaces { 424 if n.ns == namespace.Type { 425 enabled = true 426 break 427 } 428 } 429 } 430 } 431 if has || enabled { 432 nspath := filepath.Join(path, n.nstype) 433 engine.EngineConfig.OciConfig.AddOrReplaceLinuxNamespace(string(n.ns), nspath) 434 } 435 } else if err != nil { 436 return fmt.Errorf("failed to check %s root and container namespace: %s", n.ns, err) 437 } 438 } 439 440 method := "pivot" 441 if !c.mntNS { 442 method = "chroot" 443 } 444 445 _, err = rpcOps.Chroot(c.rootfs, method) 446 if err != nil { 447 return fmt.Errorf("chroot failed: %s", err) 448 } 449 450 if engine.EngineConfig.SlavePts != -1 { 451 if err := syscall.Close(engine.EngineConfig.SlavePts); err != nil { 452 return fmt.Errorf("failed to close slave part: %s", err) 453 } 454 } 455 if engine.EngineConfig.OutputStreams[0] != -1 { 456 if err := syscall.Close(engine.EngineConfig.OutputStreams[1]); err != nil { 457 return fmt.Errorf("failed to close write output stream: %s", err) 458 } 459 } 460 if engine.EngineConfig.ErrorStreams[0] != -1 { 461 if err := syscall.Close(engine.EngineConfig.ErrorStreams[1]); err != nil { 462 return fmt.Errorf("failed to close write error stream: %s", err) 463 } 464 } 465 if engine.EngineConfig.InputStreams[0] != -1 { 466 if err := syscall.Close(engine.EngineConfig.InputStreams[1]); err != nil { 467 return fmt.Errorf("failed to close write input stream: %s", err) 468 } 469 } 470 471 return nil 472 } 473 474 func (c *container) addCgroups(pid int, system *mount.System) error { 475 name := c.engine.CommonConfig.ContainerID 476 cgroupsPath := c.engine.EngineConfig.OciConfig.Linux.CgroupsPath 477 478 if !filepath.IsAbs(cgroupsPath) { 479 if cgroupsPath == "" { 480 cgroupsPath = filepath.Join("/singularity-oci", name) 481 } else { 482 cgroupsPath = filepath.Join("/", cgroupsPath) 483 } 484 } 485 486 c.engine.EngineConfig.OciConfig.Linux.CgroupsPath = cgroupsPath 487 488 manager := &cgroups.Manager{Path: cgroupsPath, Pid: pid} 489 490 if err := manager.ApplyFromSpec(c.engine.EngineConfig.OciConfig.Linux.Resources); err != nil { 491 return fmt.Errorf("Failed to apply cgroups ressources restriction: %s", err) 492 } 493 494 if c.cgroupIndex >= 0 { 495 m := c.engine.EngineConfig.OciConfig.Config.Mounts[c.cgroupIndex] 496 c.engine.EngineConfig.OciConfig.Config.Mounts = append( 497 c.engine.EngineConfig.OciConfig.Config.Mounts[:c.cgroupIndex], 498 c.engine.EngineConfig.OciConfig.Config.Mounts[c.cgroupIndex+1:]..., 499 ) 500 501 cgroupRootPath := manager.GetCgroupRootPath() 502 if cgroupRootPath == "" { 503 return fmt.Errorf("failed to determine cgroup root path") 504 } 505 506 flags, opt := mount.ConvertOptions(m.Options) 507 options := strings.Join(opt, ",") 508 509 readOnly := false 510 if flags&syscall.MS_RDONLY != 0 { 511 readOnly = true 512 flags &^= uintptr(syscall.MS_RDONLY) 513 } 514 515 hasMode := false 516 for _, o := range opt { 517 if strings.HasPrefix(o, "mode=") { 518 hasMode = true 519 break 520 } 521 } 522 if !hasMode { 523 options += ",mode=755" 524 } 525 526 if err := system.Points.AddFS(mount.OtherTag, m.Destination, "tmpfs", flags, options); err != nil { 527 return err 528 } 529 530 createSymlinks := func(*mount.System) error { 531 cgroupPath := filepath.Join(c.rpcRoot, c.rootfs, m.Destination) 532 if _, err := os.Stat(filepath.Join(cgroupPath, "cpu")); err != nil && os.IsNotExist(err) { 533 if _, err := c.rpcOps.Symlink("cpu,cpuacct", filepath.Join(c.rootfs, m.Destination, "cpu")); err != nil { 534 return err 535 } 536 if _, err := c.rpcOps.Symlink("cpu,cpuacct", filepath.Join(c.rootfs, m.Destination, "cpuacct")); err != nil { 537 return err 538 } 539 } 540 541 if _, err := os.Stat(filepath.Join(cgroupPath, "net_cls")); err != nil && os.IsNotExist(err) { 542 if _, err := c.rpcOps.Symlink("net_cls,net_prio", filepath.Join(c.rootfs, m.Destination, "net_cls")); err != nil { 543 return err 544 } 545 if _, err := c.rpcOps.Symlink("net_cls,net_prio", filepath.Join(c.rootfs, m.Destination, "net_prio")); err != nil { 546 return err 547 } 548 } 549 return nil 550 } 551 552 if err := system.RunAfterTag(mount.OtherTag, createSymlinks); err != nil { 553 return err 554 } 555 556 f, err := os.Open(fmt.Sprintf("/proc/%d/cgroup", pid)) 557 if err != nil { 558 return err 559 } 560 defer f.Close() 561 562 flags |= uintptr(syscall.MS_BIND) 563 if readOnly { 564 flags |= syscall.MS_RDONLY 565 } 566 567 scanner := bufio.NewScanner(f) 568 for scanner.Scan() { 569 cgroupLine := strings.Split(scanner.Text(), ":") 570 if strings.HasPrefix(cgroupLine[1], "name=") { 571 cgroupLine[1] = strings.Replace(cgroupLine[1], "name=", "", 1) 572 } 573 if cgroupLine[1] != "" { 574 source := filepath.Join(cgroupRootPath, cgroupLine[1], cgroupLine[2]) 575 dest := filepath.Join(m.Destination, cgroupLine[1]) 576 if err := system.Points.AddBind(mount.OtherTag, source, dest, flags); err != nil { 577 return err 578 } 579 if readOnly { 580 if err := system.Points.AddRemount(mount.OtherTag, dest, flags); err != nil { 581 return err 582 } 583 } 584 } 585 } 586 587 if readOnly { 588 if err := system.Points.AddRemount(mount.FinalTag, m.Destination, flags); err != nil { 589 return err 590 } 591 } 592 } 593 594 c.engine.EngineConfig.Cgroups = manager 595 596 return nil 597 } 598 599 func (c *container) addAllPaths(system *mount.System) error { 600 // add masked path 601 if err := c.addMaskedPathsMount(system); err != nil { 602 return err 603 } 604 605 // add read-only path 606 if !c.userNS { 607 if err := c.addReadonlyPathsMount(system); err != nil { 608 return err 609 } 610 } 611 612 return nil 613 } 614 615 func (c *container) addRootfsMount(system *mount.System) error { 616 flags := uintptr(syscall.MS_BIND) 617 618 if c.engine.EngineConfig.OciConfig.Root.Readonly { 619 sylog.Debugf("Mounted read-only") 620 flags |= syscall.MS_RDONLY 621 } 622 623 parentRootfs, err := proc.ParentMount(c.rootfs) 624 if err != nil { 625 return err 626 } 627 628 sylog.Debugf("Parent rootfs: %s", parentRootfs) 629 630 if _, err := c.rpcOps.Mount("", parentRootfs, "", syscall.MS_PRIVATE, ""); err != nil { 631 return err 632 } 633 if err := system.Points.AddBind(mount.RootfsTag, c.rootfs, c.rootfs, flags); err != nil { 634 return err 635 } 636 if flags&syscall.MS_RDONLY != 0 { 637 return system.Points.AddRemount(mount.FinalTag, c.rootfs, flags) 638 } 639 640 return nil 641 } 642 643 func (c *container) addDefaultDevices(system *mount.System) error { 644 oldmask := syscall.Umask(0) 645 defer syscall.Umask(oldmask) 646 647 rootfsPath := filepath.Join(c.rpcRoot, c.rootfs) 648 649 devPath := filepath.Join(rootfsPath, fs.EvalRelative("/dev", rootfsPath)) 650 if _, err := os.Lstat(devPath); os.IsNotExist(err) { 651 if err := os.Mkdir(devPath, 0755); err != nil { 652 return err 653 } 654 } 655 656 for _, symlink := range symlinkDevices { 657 path := filepath.Join(rootfsPath, symlink.new) 658 if _, err := os.Lstat(path); os.IsNotExist(err) { 659 if c.userNS { 660 path = filepath.Join(c.rootfs, symlink.new) 661 if _, err := c.rpcOps.Symlink(symlink.old, path); err != nil { 662 return err 663 } 664 } else { 665 if err := os.Symlink(symlink.old, path); err != nil { 666 return err 667 } 668 } 669 } 670 } 671 672 if c.engine.EngineConfig.OciConfig.Process.Terminal { 673 path := filepath.Join(rootfsPath, "dev", "console") 674 if _, err := os.Lstat(path); os.IsNotExist(err) { 675 if c.userNS { 676 if _, err := c.rpcOps.Touch(filepath.Join(c.rootfs, "dev", "console")); err != nil { 677 return err 678 } 679 } else { 680 if err := fs.Touch(path); err != nil { 681 return err 682 } 683 } 684 path = fmt.Sprintf("/proc/self/fd/%d", c.engine.EngineConfig.SlavePts) 685 console, err := os.Readlink(path) 686 if err != nil { 687 return err 688 } 689 if err := system.Points.AddBind(mount.OtherTag, console, "/dev/console", syscall.MS_BIND); err != nil { 690 return err 691 } 692 } 693 } 694 695 for _, device := range devices { 696 dev := int((device.major << 8) | (device.minor & 0xff) | ((device.minor & 0xfff00) << 12)) 697 path := filepath.Join(rootfsPath, device.path) 698 if _, err := os.Lstat(path); os.IsNotExist(err) { 699 if c.userNS { 700 path = filepath.Join(c.rootfs, device.path) 701 if _, err := os.Stat(device.path); os.IsNotExist(err) { 702 sylog.Debugf("skipping mount, %s doesn't exists", device.path) 703 continue 704 } 705 if _, err := c.rpcOps.Touch(path); err != nil { 706 return err 707 } 708 if _, err := c.rpcOps.Mount(device.path, path, "", syscall.MS_BIND, ""); err != nil { 709 return err 710 } 711 } else { 712 if err := syscall.Mknod(path, uint32(device.mode), dev); err != nil { 713 return fmt.Errorf("mknod: %s", err) 714 } 715 if device.uid != 0 || device.gid != 0 { 716 if err := os.Chown(path, device.uid, device.gid); err != nil { 717 return err 718 } 719 } 720 } 721 } 722 } 723 724 return nil 725 } 726 727 func (c *container) addDevices(system *mount.System) error { 728 for _, d := range c.engine.EngineConfig.OciConfig.Linux.Devices { 729 var dev device 730 731 if d.Path == "" { 732 return fmt.Errorf("device path required") 733 } 734 dev.path = d.Path 735 736 if d.FileMode != nil { 737 dev.mode = *d.FileMode 738 } else { 739 dev.mode = 0644 740 } 741 742 switch d.Type { 743 case "c", "u": 744 dev.mode |= syscall.S_IFCHR 745 dev.major = d.Major 746 dev.minor = d.Minor 747 case "b": 748 dev.mode |= syscall.S_IFBLK 749 dev.major = d.Major 750 dev.minor = d.Minor 751 case "p": 752 dev.mode |= syscall.S_IFIFO 753 default: 754 return fmt.Errorf("device type unknown for %s", d.Path) 755 } 756 757 if d.UID != nil { 758 dev.uid = int(*d.UID) 759 } 760 if d.GID != nil { 761 dev.gid = int(*d.GID) 762 } 763 764 devices = append(devices, dev) 765 } 766 767 if c.devIndex >= 0 { 768 m := &c.engine.EngineConfig.OciConfig.Config.Mounts[c.devIndex] 769 770 flags, _ := mount.ConvertOptions(m.Options) 771 772 flags |= uintptr(syscall.MS_BIND) 773 if flags&syscall.MS_RDONLY != 0 { 774 if err := system.Points.AddRemount(mount.FinalTag, m.Destination, flags); err != nil { 775 return err 776 } 777 for i := len(m.Options) - 1; i >= 0; i-- { 778 if m.Options[i] == "ro" { 779 m.Options = append(m.Options[:i], m.Options[i+1:]...) 780 break 781 } 782 } 783 } 784 785 if c.engine.EngineConfig.OciConfig.Linux.Resources == nil { 786 c.engine.EngineConfig.OciConfig.Linux.Resources = &specs.LinuxResources{} 787 } 788 789 c.engine.EngineConfig.OciConfig.Linux.Resources.Devices = append(c.engine.EngineConfig.OciConfig.Linux.Resources.Devices, cgroupDevices...) 790 } 791 792 return nil 793 } 794 795 func (c *container) addMaskedPathsMount(system *mount.System) error { 796 paths := c.engine.EngineConfig.OciConfig.Linux.MaskedPaths 797 798 dir, err := instance.GetDirPrivileged(c.engine.CommonConfig.ContainerID, instance.OciSubDir) 799 if err != nil { 800 return err 801 } 802 nullPath := filepath.Join(dir, "null") 803 804 if _, err := os.Stat(nullPath); os.IsNotExist(err) { 805 oldmask := syscall.Umask(0) 806 defer syscall.Umask(oldmask) 807 808 if err := os.Mkdir(nullPath, 0755); err != nil { 809 return err 810 } 811 } 812 813 for _, path := range paths { 814 relativePath := filepath.Join(c.rootfs, path) 815 rpcPath := filepath.Join(c.rpcRoot, relativePath) 816 fi, err := os.Stat(rpcPath) 817 if err != nil { 818 sylog.Debugf("ignoring masked path %s: %s", path, err) 819 continue 820 } 821 if fi.IsDir() { 822 if err := system.Points.AddBind(mount.OtherTag, nullPath, relativePath, syscall.MS_BIND); err != nil { 823 return err 824 } 825 } else if err := system.Points.AddBind(mount.OtherTag, "/dev/null", relativePath, syscall.MS_BIND); err != nil { 826 return err 827 } 828 } 829 return nil 830 } 831 832 func (c *container) addReadonlyPathsMount(system *mount.System) error { 833 paths := c.engine.EngineConfig.OciConfig.Linux.ReadonlyPaths 834 835 for _, path := range paths { 836 relativePath := filepath.Join(c.rootfs, path) 837 rpcPath := filepath.Join(c.rpcRoot, relativePath) 838 _, err := os.Stat(rpcPath) 839 if err != nil { 840 sylog.Debugf("ignoring read-only path %s: %s", path, err) 841 continue 842 } 843 if err := system.Points.AddBind(mount.OtherTag, relativePath, relativePath, syscall.MS_BIND|syscall.MS_RDONLY); err != nil { 844 return err 845 } 846 if err := system.Points.AddRemount(mount.OtherTag, relativePath, syscall.MS_BIND|syscall.MS_RDONLY); err != nil { 847 return err 848 } 849 } 850 return nil 851 } 852 853 func (c *container) mount(point *mount.Point) error { 854 source := point.Source 855 dest := point.Destination 856 flags, opts := mount.ConvertOptions(point.Options) 857 optsString := strings.Join(opts, ",") 858 ignore := false 859 860 if flags&syscall.MS_REMOUNT != 0 { 861 ignore = true 862 } 863 864 if !strings.HasPrefix(dest, c.rootfs) { 865 rootfsPath := filepath.Join(c.rpcRoot, c.rootfs) 866 relativeDest := fs.EvalRelative(dest, c.rootfs) 867 procDest := filepath.Join(rootfsPath, relativeDest) 868 869 dest = filepath.Join(c.rootfs, relativeDest) 870 871 sylog.Debugf("Checking if %s exists", procDest) 872 if _, err := os.Stat(procDest); os.IsNotExist(err) && !ignore { 873 oldmask := syscall.Umask(0) 874 defer syscall.Umask(oldmask) 875 876 if point.Type != "" { 877 sylog.Debugf("Creating %s", procDest) 878 if c.userNS { 879 if _, err := c.rpcOps.MkdirAll(dest, 0755); err != nil { 880 return err 881 } 882 } else { 883 if err := os.MkdirAll(procDest, 0755); err != nil { 884 return err 885 } 886 } 887 } else { 888 var st syscall.Stat_t 889 890 dir := filepath.Dir(procDest) 891 if _, err := os.Stat(dir); os.IsNotExist(err) { 892 sylog.Debugf("Creating parent %s", dir) 893 if c.userNS { 894 if _, err := c.rpcOps.Mkdir(filepath.Dir(dest), 0755); err != nil { 895 return err 896 } 897 } else { 898 if err := os.MkdirAll(dir, 0755); err != nil { 899 return err 900 } 901 } 902 } 903 904 if err := syscall.Stat(source, &st); err != nil { 905 sylog.Debugf("Ignoring %s: %s", source, err) 906 return nil 907 } 908 switch st.Mode & syscall.S_IFMT { 909 case syscall.S_IFDIR: 910 sylog.Debugf("Creating dir %s", filepath.Base(procDest)) 911 if c.userNS { 912 if _, err := c.rpcOps.Mkdir(dest, 0755); err != nil { 913 return err 914 } 915 } else { 916 if err := os.Mkdir(procDest, 0755); err != nil { 917 return err 918 } 919 } 920 case syscall.S_IFREG: 921 sylog.Debugf("Creating file %s", filepath.Base(procDest)) 922 if c.userNS { 923 if _, err := c.rpcOps.Touch(dest); err != nil { 924 return err 925 } 926 } else { 927 if err := fs.Touch(procDest); err != nil { 928 return err 929 } 930 } 931 } 932 } 933 } 934 } else { 935 procDest := filepath.Join(c.rpcRoot, dest) 936 937 sylog.Debugf("Checking if %s exists", procDest) 938 if _, err := os.Stat(procDest); os.IsNotExist(err) { 939 sylog.Warningf("destination %s doesn't exist", dest) 940 return nil 941 } 942 } 943 944 if ignore { 945 sylog.Debugf("(re)mount %s", dest) 946 } else { 947 sylog.Debugf("Mount %s to %s : %s [%s]", source, dest, point.Type, optsString) 948 } 949 950 _, err := c.rpcOps.Mount(source, dest, point.Type, flags, optsString) 951 if err != nil { 952 sylog.Debugf("RPC mount error: %s", err) 953 } 954 955 return err 956 }