github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/runsc/cmd/chroot.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"os"
    21  	"path"
    22  	"path/filepath"
    23  	"regexp"
    24  
    25  	"github.com/MerlinKodo/gvisor/pkg/log"
    26  	"github.com/MerlinKodo/gvisor/runsc/cmd/util"
    27  	"github.com/MerlinKodo/gvisor/runsc/config"
    28  	"github.com/MerlinKodo/gvisor/runsc/specutils"
    29  	specs "github.com/opencontainers/runtime-spec/specs-go"
    30  	"golang.org/x/sys/unix"
    31  )
    32  
    33  // mountInChroot creates the destination mount point in the given chroot and
    34  // mounts the source.
    35  func mountInChroot(chroot, src, dst, typ string, flags uint32) error {
    36  	chrootDst := filepath.Join(chroot, dst)
    37  	log.Infof("Mounting %q at %q", src, chrootDst)
    38  
    39  	if err := specutils.SafeSetupAndMount(src, chrootDst, typ, flags, "/proc"); err != nil {
    40  		return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err)
    41  	}
    42  	return nil
    43  }
    44  
    45  func pivotRoot(root string) error {
    46  	if err := os.Chdir(root); err != nil {
    47  		return fmt.Errorf("error changing working directory: %v", err)
    48  	}
    49  	// pivot_root(new_root, put_old) moves the root filesystem (old_root)
    50  	// of the calling process to the directory put_old and makes new_root
    51  	// the new root filesystem of the calling process.
    52  	//
    53  	// pivot_root(".", ".") makes a mount of the working directory the new
    54  	// root filesystem, so it will be moved in "/" and then the old_root
    55  	// will be moved to "/" too. The parent mount of the old_root will be
    56  	// new_root, so after umounting the old_root, we will see only
    57  	// the new_root in "/".
    58  	if err := unix.PivotRoot(".", "."); err != nil {
    59  		return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err)
    60  	}
    61  
    62  	if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
    63  		return fmt.Errorf("error umounting the old root file system: %v", err)
    64  	}
    65  	return nil
    66  }
    67  
    68  func copyFile(dst, src string) error {
    69  	in, err := os.Open(src)
    70  	if err != nil {
    71  		return err
    72  	}
    73  	defer in.Close()
    74  
    75  	out, err := os.Create(dst)
    76  	if err != nil {
    77  		return err
    78  	}
    79  	defer out.Close()
    80  
    81  	_, err = out.ReadFrom(in)
    82  	return err
    83  }
    84  
    85  // setUpChroot creates an empty directory with runsc mounted at /runsc and proc
    86  // mounted at /proc.
    87  func setUpChroot(pidns bool, spec *specs.Spec, conf *config.Config, nvidiaDevMinors []uint32) error {
    88  	// We are a new mount namespace, so we can use /tmp as a directory to
    89  	// construct a new root.
    90  	chroot := os.TempDir()
    91  
    92  	log.Infof("Setting up sandbox chroot in %q", chroot)
    93  
    94  	// Convert all shared mounts into slave to be sure that nothing will be
    95  	// propagated outside of our namespace.
    96  	if err := specutils.SafeMount("", "/", "", unix.MS_SLAVE|unix.MS_REC, "", "/proc"); err != nil {
    97  		return fmt.Errorf("error converting mounts: %v", err)
    98  	}
    99  
   100  	if err := specutils.SafeMount("runsc-root", chroot, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, "", "/proc"); err != nil {
   101  		return fmt.Errorf("error mounting tmpfs in chroot: %v", err)
   102  	}
   103  
   104  	if err := os.Mkdir(filepath.Join(chroot, "etc"), 0755); err != nil {
   105  		return fmt.Errorf("error creating /etc in chroot: %v", err)
   106  	}
   107  
   108  	if err := copyFile(filepath.Join(chroot, "etc/localtime"), "/etc/localtime"); err != nil {
   109  		log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err)
   110  	}
   111  
   112  	if pidns {
   113  		flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY)
   114  		if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil {
   115  			return fmt.Errorf("error mounting proc in chroot: %v", err)
   116  		}
   117  	} else {
   118  		if err := mountInChroot(chroot, "/proc", "/proc", "bind", unix.MS_BIND|unix.MS_RDONLY|unix.MS_REC); err != nil {
   119  			return fmt.Errorf("error mounting proc in chroot: %v", err)
   120  		}
   121  	}
   122  
   123  	if err := nvproxyUpdateChroot(chroot, spec, conf, nvidiaDevMinors); err != nil {
   124  		return fmt.Errorf("error configuring chroot for Nvidia GPUs: %w", err)
   125  	}
   126  	if err := tpuProxyUpdateChroot(chroot, conf); err != nil {
   127  		return fmt.Errorf("error configuring chroot for TPU devices: %w", err)
   128  	}
   129  
   130  	if err := specutils.SafeMount("", chroot, "", unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_BIND, "", "/proc"); err != nil {
   131  		return fmt.Errorf("error remounting chroot in read-only: %v", err)
   132  	}
   133  
   134  	return pivotRoot(chroot)
   135  }
   136  
   137  func tpuProxyUpdateChroot(chroot string, conf *config.Config) error {
   138  	if !conf.TPUProxy {
   139  		return nil
   140  	}
   141  	devices, err := util.EnumerateHostTPUDevices()
   142  	if err != nil {
   143  		return fmt.Errorf("enumerating TPU device files: %w", err)
   144  	}
   145  	for _, deviceNum := range devices {
   146  		devPath := fmt.Sprintf("/dev/accel%d", deviceNum)
   147  		if err := mountInChroot(chroot, devPath, devPath, "bind", unix.MS_BIND); err != nil {
   148  			return fmt.Errorf("error mounting %q in chroot: %v", devPath, err)
   149  		}
   150  		finfo, err := os.Stat(path.Join(chroot, devPath))
   151  		if err != nil {
   152  			return fmt.Errorf("error statting %q: %v", devPath, err)
   153  		}
   154  		// Ensure the file mounted in was a char device file.
   155  		if finfo.Mode()&os.ModeType != os.ModeCharDevice|os.ModeDevice {
   156  			return fmt.Errorf("unexpected file type for %q, want %s, got %s", path.Join(chroot, devPath), os.ModeCharDevice|os.ModeDevice, finfo.Mode()&os.ModeType)
   157  
   158  		}
   159  		// Multiple paths link to the /sys/devices/pci0000:00/<pci_address>
   160  		// directory that contains all relevant sysfs accel device info that we need
   161  		// bind mounted into the sandbox chroot. We can construct this path by
   162  		// reading the link below, which points to
   163  		// /sys/devices/pci0000:00/<pci_address>/accel/accel# and traversing up 2
   164  		// directories.
   165  		sysAccelPath := fmt.Sprintf("/sys/class/accel/accel%d", deviceNum)
   166  		sysAccelLink, err := os.Readlink(sysAccelPath)
   167  		if err != nil {
   168  			return fmt.Errorf("error reading %q: %v", sysAccelPath, err)
   169  		}
   170  		// Ensure the link is in the form we expect.
   171  		sysAccelLinkMatcher := regexp.MustCompile(fmt.Sprintf(`../../devices/pci0000:00/(\d+:\d+:\d+\.\d+)/accel/accel%d`, deviceNum))
   172  		if !sysAccelLinkMatcher.MatchString(sysAccelLink) {
   173  			return fmt.Errorf("unexpected link %q -> %q, link should have %q format", sysAccelPath, sysAccelLink, sysAccelLinkMatcher.String())
   174  		}
   175  		sysPCIDeviceDir, err := filepath.Abs(path.Join(filepath.Dir(sysAccelPath), sysAccelLink, "../.."))
   176  		if err != nil {
   177  			return fmt.Errorf("error parsing path %q: %v", sysAccelPath, err)
   178  		}
   179  		if err := mountInChroot(chroot, sysPCIDeviceDir, sysPCIDeviceDir, "bind", unix.MS_BIND|unix.MS_RDONLY); err != nil {
   180  			return fmt.Errorf("error mounting %q in chroot: %v", sysAccelPath, err)
   181  		}
   182  	}
   183  	return nil
   184  }
   185  
   186  func nvproxyUpdateChroot(chroot string, spec *specs.Spec, conf *config.Config, devMinors []uint32) error {
   187  	if !specutils.GPUFunctionalityRequested(spec, conf) {
   188  		return nil
   189  	}
   190  	if err := os.Mkdir(filepath.Join(chroot, "dev"), 0755); err != nil && !errors.Is(err, os.ErrExist) {
   191  		return fmt.Errorf("error creating /dev in chroot: %w", err)
   192  	}
   193  	if err := mountInChroot(chroot, "/dev/nvidiactl", "/dev/nvidiactl", "bind", unix.MS_BIND); err != nil {
   194  		return fmt.Errorf("error mounting /dev/nvidiactl in chroot: %w", err)
   195  	}
   196  	if err := mountInChroot(chroot, "/dev/nvidia-uvm", "/dev/nvidia-uvm", "bind", unix.MS_BIND); err != nil {
   197  		return fmt.Errorf("error mounting /dev/nvidia-uvm in chroot: %w", err)
   198  	}
   199  	for _, devMinor := range devMinors {
   200  		path := fmt.Sprintf("/dev/nvidia%d", devMinor)
   201  		if err := mountInChroot(chroot, path, path, "bind", unix.MS_BIND); err != nil {
   202  			return fmt.Errorf("error mounting %q in chroot: %v", path, err)
   203  		}
   204  	}
   205  	return nil
   206  }