gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/runsc/cmd/chroot.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cmd
    16  
    17  import (
    18  	"fmt"
    19  	"os"
    20  	"path"
    21  	"path/filepath"
    22  	"regexp"
    23  
    24  	specs "github.com/opencontainers/runtime-spec/specs-go"
    25  	"golang.org/x/sys/unix"
    26  	"gvisor.dev/gvisor/pkg/log"
    27  	"gvisor.dev/gvisor/runsc/cmd/util"
    28  	"gvisor.dev/gvisor/runsc/config"
    29  	"gvisor.dev/gvisor/runsc/specutils"
    30  )
    31  
    32  // mountInChroot creates the destination mount point in the given chroot and
    33  // mounts the source.
    34  func mountInChroot(chroot, src, dst, typ string, flags uint32) error {
    35  	chrootDst := filepath.Join(chroot, dst)
    36  	log.Infof("Mounting %q at %q", src, chrootDst)
    37  
    38  	if err := specutils.SafeSetupAndMount(src, chrootDst, typ, flags, "/proc"); err != nil {
    39  		return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err)
    40  	}
    41  	return nil
    42  }
    43  
    44  func pivotRoot(root string) error {
    45  	if err := os.Chdir(root); err != nil {
    46  		return fmt.Errorf("error changing working directory: %v", err)
    47  	}
    48  	// pivot_root(new_root, put_old) moves the root filesystem (old_root)
    49  	// of the calling process to the directory put_old and makes new_root
    50  	// the new root filesystem of the calling process.
    51  	//
    52  	// pivot_root(".", ".") makes a mount of the working directory the new
    53  	// root filesystem, so it will be moved in "/" and then the old_root
    54  	// will be moved to "/" too. The parent mount of the old_root will be
    55  	// new_root, so after umounting the old_root, we will see only
    56  	// the new_root in "/".
    57  	if err := unix.PivotRoot(".", "."); err != nil {
    58  		return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err)
    59  	}
    60  
    61  	if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
    62  		return fmt.Errorf("error umounting the old root file system: %v", err)
    63  	}
    64  	return nil
    65  }
    66  
    67  func copyFile(dst, src string) error {
    68  	in, err := os.Open(src)
    69  	if err != nil {
    70  		return err
    71  	}
    72  	defer in.Close()
    73  
    74  	out, err := os.Create(dst)
    75  	if err != nil {
    76  		return err
    77  	}
    78  	defer out.Close()
    79  
    80  	_, err = out.ReadFrom(in)
    81  	return err
    82  }
    83  
    84  // setUpChroot creates an empty directory with runsc mounted at /runsc and proc
    85  // mounted at /proc.
    86  func setUpChroot(pidns bool, spec *specs.Spec, conf *config.Config) error {
    87  	// We are a new mount namespace, so we can use /tmp as a directory to
    88  	// construct a new root.
    89  	chroot := os.TempDir()
    90  
    91  	log.Infof("Setting up sandbox chroot in %q", chroot)
    92  
    93  	// Convert all shared mounts into slave to be sure that nothing will be
    94  	// propagated outside of our namespace.
    95  	if err := specutils.SafeMount("", "/", "", unix.MS_SLAVE|unix.MS_REC, "", "/proc"); err != nil {
    96  		return fmt.Errorf("error converting mounts: %v", err)
    97  	}
    98  
    99  	if err := specutils.SafeMount("runsc-root", chroot, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, "", "/proc"); err != nil {
   100  		return fmt.Errorf("error mounting tmpfs in chroot: %v", err)
   101  	}
   102  
   103  	if err := os.Mkdir(filepath.Join(chroot, "etc"), 0755); err != nil {
   104  		return fmt.Errorf("error creating /etc in chroot: %v", err)
   105  	}
   106  
   107  	if err := copyFile(filepath.Join(chroot, "etc/localtime"), "/etc/localtime"); err != nil {
   108  		log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err)
   109  	}
   110  
   111  	if pidns {
   112  		flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY)
   113  		if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil {
   114  			return fmt.Errorf("error mounting proc in chroot: %v", err)
   115  		}
   116  	} else {
   117  		if err := mountInChroot(chroot, "/proc", "/proc", "bind", unix.MS_BIND|unix.MS_RDONLY|unix.MS_REC); err != nil {
   118  			return fmt.Errorf("error mounting proc in chroot: %v", err)
   119  		}
   120  	}
   121  
   122  	if err := tpuProxyUpdateChroot(chroot, spec, conf); err != nil {
   123  		return fmt.Errorf("error configuring chroot for TPU devices: %w", err)
   124  	}
   125  
   126  	if err := specutils.SafeMount("", chroot, "", unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_BIND, "", "/proc"); err != nil {
   127  		return fmt.Errorf("error remounting chroot in read-only: %v", err)
   128  	}
   129  
   130  	return pivotRoot(chroot)
   131  }
   132  
   133  // Mount the path that dest points to for TPU at chroot, the mounted path is returned in absolute form.
   134  func mountTPUSyslinkInChroot(chroot, dest, relativePath string, validator func(link string) bool) (string, error) {
   135  	src, err := os.Readlink(dest)
   136  	if err != nil {
   137  		return "", fmt.Errorf("error reading %v: %v", src, err)
   138  	}
   139  	// Ensure the link is in the form we expect.
   140  	if !validator(src) {
   141  		return "", fmt.Errorf("unexpected link %q -> %q", dest, src)
   142  	}
   143  	path, err := filepath.Abs(path.Join(filepath.Dir(dest), src, relativePath))
   144  	if err != nil {
   145  		return "", fmt.Errorf("error parsing path %q: %v", src, err)
   146  	}
   147  	if err := mountInChroot(chroot, path, path, "bind", unix.MS_BIND|unix.MS_RDONLY); err != nil {
   148  		return "", fmt.Errorf("error mounting %q in chroot: %v", dest, err)
   149  	}
   150  	return path, nil
   151  }
   152  
   153  func mountTPUDeviceInfoInChroot(chroot, devicePath, sysfsFormat, pciDeviceFormat string) error {
   154  	deviceNum, valid, err := util.ExtractTpuDeviceMinor(devicePath)
   155  	if err != nil {
   156  		return fmt.Errorf("extracting TPU device minor: %w", err)
   157  	}
   158  	if !valid {
   159  		return nil
   160  	}
   161  	// Multiple paths link to the /sys/devices/pci0000:00/<pci_address>
   162  	// directory that contains all relevant sysfs accel/vfio device info that we need
   163  	// bind mounted into the sandbox chroot. We can construct this path by
   164  	// reading the link below, which points to
   165  	//   * /sys/devices/pci0000:00/<pci_address>/accel/accel#
   166  	//   * or /sys/devices/pci0000:00/<pci_address>/vfio-dev/vfio# for VFIO-based TPU
   167  	// and traversing up 2 directories.
   168  	// The sysDevicePath itself is a soft link to the deivce directory.
   169  	sysDevicePath := fmt.Sprintf(sysfsFormat, deviceNum)
   170  	sysPCIDeviceDir, err := mountTPUSyslinkInChroot(chroot, sysDevicePath, "../..", func(link string) bool {
   171  		sysDeviceLinkMatcher := regexp.MustCompile(fmt.Sprintf(pciDeviceFormat, deviceNum))
   172  		return sysDeviceLinkMatcher.MatchString(link)
   173  	})
   174  	if err != nil {
   175  		return err
   176  	}
   177  
   178  	// Mount the device's IOMMU group if available.
   179  	iommuGroupPath := path.Join(sysPCIDeviceDir, "iommu_group")
   180  	if _, err := os.Stat(iommuGroupPath); err == nil {
   181  		if _, err := mountTPUSyslinkInChroot(chroot, iommuGroupPath, "", func(link string) bool {
   182  			return fmt.Sprintf("../../../kernel/iommu_groups/%d", deviceNum) == link
   183  		}); err != nil {
   184  			return err
   185  		}
   186  	}
   187  	return nil
   188  }
   189  
   190  func tpuProxyUpdateChroot(chroot string, spec *specs.Spec, conf *config.Config) error {
   191  	if !specutils.TPUProxyIsEnabled(spec, conf) {
   192  		return nil
   193  	}
   194  	// When a path glob is added to pathGlobToSysfsFormat, the corresponding pciDeviceFormat has to be added to pathGlobToPciDeviceFormat.
   195  	pathGlobToSysfsFormat := map[string]string{
   196  		"/dev/accel*": "/sys/class/accel/accel%d",
   197  		"/dev/vfio/*": "/sys/class/vfio-dev/vfio%d"}
   198  	pathGlobToPciDeviceFormat := map[string]string{
   199  		"/dev/accel*": `../../devices/pci0000:00/(\d+:\d+:\d+\.\d+)/accel/accel%d`,
   200  		"/dev/vfio/*": `../../devices/pci0000:00/(\d+:\d+:\d+\.\d+)/vfio-dev/vfio%d`}
   201  	// Bind mount device info directories for all TPU devices on the host.
   202  	// For v4 TPU, the directory /sys/devices/pci0000:00/<pci_address>/accel/accel# is mounted;
   203  	// For v5e TPU, the directory /sys/devices/pci0000:00/<pci_address>/vfio-dev/vfio# is mounted.
   204  	for pathGlob, sysfsFormat := range pathGlobToSysfsFormat {
   205  		paths, err := filepath.Glob(pathGlob)
   206  		if err != nil {
   207  			return fmt.Errorf("enumerating TPU device files: %w", err)
   208  		}
   209  		for _, devPath := range paths {
   210  			if err := mountTPUDeviceInfoInChroot(chroot, devPath, sysfsFormat, pathGlobToPciDeviceFormat[pathGlob]); err != nil {
   211  				return err
   212  			}
   213  		}
   214  	}
   215  	return nil
   216  }