github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/runsc/cmd/chroot.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cmd 16 17 import ( 18 "fmt" 19 "os" 20 "path" 21 "path/filepath" 22 "regexp" 23 24 specs "github.com/opencontainers/runtime-spec/specs-go" 25 "golang.org/x/sys/unix" 26 "github.com/metacubex/gvisor/pkg/log" 27 "github.com/metacubex/gvisor/runsc/cmd/util" 28 "github.com/metacubex/gvisor/runsc/config" 29 "github.com/metacubex/gvisor/runsc/specutils" 30 ) 31 32 // mountInChroot creates the destination mount point in the given chroot and 33 // mounts the source. 34 func mountInChroot(chroot, src, dst, typ string, flags uint32) error { 35 chrootDst := filepath.Join(chroot, dst) 36 log.Infof("Mounting %q at %q", src, chrootDst) 37 38 if err := specutils.SafeSetupAndMount(src, chrootDst, typ, flags, "/proc"); err != nil { 39 return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err) 40 } 41 return nil 42 } 43 44 func pivotRoot(root string) error { 45 if err := os.Chdir(root); err != nil { 46 return fmt.Errorf("error changing working directory: %v", err) 47 } 48 // pivot_root(new_root, put_old) moves the root filesystem (old_root) 49 // of the calling process to the directory put_old and makes new_root 50 // the new root filesystem of the calling process. 51 // 52 // pivot_root(".", ".") makes a mount of the working directory the new 53 // root filesystem, so it will be moved in "/" and then the old_root 54 // will be moved to "/" too. The parent mount of the old_root will be 55 // new_root, so after umounting the old_root, we will see only 56 // the new_root in "/". 57 if err := unix.PivotRoot(".", "."); err != nil { 58 return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err) 59 } 60 61 if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { 62 return fmt.Errorf("error umounting the old root file system: %v", err) 63 } 64 return nil 65 } 66 67 func copyFile(dst, src string) error { 68 in, err := os.Open(src) 69 if err != nil { 70 return err 71 } 72 defer in.Close() 73 74 out, err := os.Create(dst) 75 if err != nil { 76 return err 77 } 78 defer out.Close() 79 80 _, err = out.ReadFrom(in) 81 return err 82 } 83 84 // setUpChroot creates an empty directory with runsc mounted at /runsc and proc 85 // mounted at /proc. 86 func setUpChroot(pidns bool, spec *specs.Spec, conf *config.Config) error { 87 // We are a new mount namespace, so we can use /tmp as a directory to 88 // construct a new root. 89 chroot := os.TempDir() 90 91 log.Infof("Setting up sandbox chroot in %q", chroot) 92 93 // Convert all shared mounts into slave to be sure that nothing will be 94 // propagated outside of our namespace. 95 if err := specutils.SafeMount("", "/", "", unix.MS_SLAVE|unix.MS_REC, "", "/proc"); err != nil { 96 return fmt.Errorf("error converting mounts: %v", err) 97 } 98 99 if err := specutils.SafeMount("runsc-root", chroot, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, "", "/proc"); err != nil { 100 return fmt.Errorf("error mounting tmpfs in chroot: %v", err) 101 } 102 103 if err := os.Mkdir(filepath.Join(chroot, "etc"), 0755); err != nil { 104 return fmt.Errorf("error creating /etc in chroot: %v", err) 105 } 106 107 if err := copyFile(filepath.Join(chroot, "etc/localtime"), "/etc/localtime"); err != nil { 108 log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err) 109 } 110 111 if pidns { 112 flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY) 113 if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil { 114 return fmt.Errorf("error mounting proc in chroot: %v", err) 115 } 116 } else { 117 if err := mountInChroot(chroot, "/proc", "/proc", "bind", unix.MS_BIND|unix.MS_RDONLY|unix.MS_REC); err != nil { 118 return fmt.Errorf("error mounting proc in chroot: %v", err) 119 } 120 } 121 122 if err := tpuProxyUpdateChroot(chroot, spec, conf); err != nil { 123 return fmt.Errorf("error configuring chroot for TPU devices: %w", err) 124 } 125 126 if err := specutils.SafeMount("", chroot, "", unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_BIND, "", "/proc"); err != nil { 127 return fmt.Errorf("error remounting chroot in read-only: %v", err) 128 } 129 130 return pivotRoot(chroot) 131 } 132 133 // Mount the path that dest points to for TPU at chroot, the mounted path is returned in absolute form. 134 func mountTPUSyslinkInChroot(chroot, dest, relativePath string, validator func(link string) bool) (string, error) { 135 src, err := os.Readlink(dest) 136 if err != nil { 137 return "", fmt.Errorf("error reading %v: %v", src, err) 138 } 139 // Ensure the link is in the form we expect. 140 if !validator(src) { 141 return "", fmt.Errorf("unexpected link %q -> %q", dest, src) 142 } 143 path, err := filepath.Abs(path.Join(filepath.Dir(dest), src, relativePath)) 144 if err != nil { 145 return "", fmt.Errorf("error parsing path %q: %v", src, err) 146 } 147 if err := mountInChroot(chroot, path, path, "bind", unix.MS_BIND|unix.MS_RDONLY); err != nil { 148 return "", fmt.Errorf("error mounting %q in chroot: %v", dest, err) 149 } 150 return path, nil 151 } 152 153 func mountTPUDeviceInfoInChroot(chroot, devicePath, sysfsFormat, pciDeviceFormat string) error { 154 deviceNum, valid, err := util.ExtractTpuDeviceMinor(devicePath) 155 if err != nil { 156 return fmt.Errorf("extracting TPU device minor: %w", err) 157 } 158 if !valid { 159 return nil 160 } 161 // Multiple paths link to the /sys/devices/pci0000:00/<pci_address> 162 // directory that contains all relevant sysfs accel/vfio device info that we need 163 // bind mounted into the sandbox chroot. We can construct this path by 164 // reading the link below, which points to 165 // * /sys/devices/pci0000:00/<pci_address>/accel/accel# 166 // * or /sys/devices/pci0000:00/<pci_address>/vfio-dev/vfio# for VFIO-based TPU 167 // and traversing up 2 directories. 168 // The sysDevicePath itself is a soft link to the deivce directory. 169 sysDevicePath := fmt.Sprintf(sysfsFormat, deviceNum) 170 sysPCIDeviceDir, err := mountTPUSyslinkInChroot(chroot, sysDevicePath, "../..", func(link string) bool { 171 sysDeviceLinkMatcher := regexp.MustCompile(fmt.Sprintf(pciDeviceFormat, deviceNum)) 172 return sysDeviceLinkMatcher.MatchString(link) 173 }) 174 if err != nil { 175 return err 176 } 177 178 // Mount the device's IOMMU group if available. 179 iommuGroupPath := path.Join(sysPCIDeviceDir, "iommu_group") 180 if _, err := os.Stat(iommuGroupPath); err == nil { 181 if _, err := mountTPUSyslinkInChroot(chroot, iommuGroupPath, "", func(link string) bool { 182 return fmt.Sprintf("../../../kernel/iommu_groups/%d", deviceNum) == link 183 }); err != nil { 184 return err 185 } 186 } 187 return nil 188 } 189 190 func tpuProxyUpdateChroot(chroot string, spec *specs.Spec, conf *config.Config) error { 191 if !specutils.TPUProxyIsEnabled(spec, conf) { 192 return nil 193 } 194 // When a path glob is added to pathGlobToSysfsFormat, the corresponding pciDeviceFormat has to be added to pathGlobToPciDeviceFormat. 195 pathGlobToSysfsFormat := map[string]string{ 196 "/dev/accel*": "/sys/class/accel/accel%d", 197 "/dev/vfio/*": "/sys/class/vfio-dev/vfio%d"} 198 pathGlobToPciDeviceFormat := map[string]string{ 199 "/dev/accel*": `../../devices/pci0000:00/(\d+:\d+:\d+\.\d+)/accel/accel%d`, 200 "/dev/vfio/*": `../../devices/pci0000:00/(\d+:\d+:\d+\.\d+)/vfio-dev/vfio%d`} 201 // Bind mount device info directories for all TPU devices on the host. 202 // For v4 TPU, the directory /sys/devices/pci0000:00/<pci_address>/accel/accel# is mounted; 203 // For v5e TPU, the directory /sys/devices/pci0000:00/<pci_address>/vfio-dev/vfio# is mounted. 204 for pathGlob, sysfsFormat := range pathGlobToSysfsFormat { 205 paths, err := filepath.Glob(pathGlob) 206 if err != nil { 207 return fmt.Errorf("enumerating TPU device files: %w", err) 208 } 209 for _, devPath := range paths { 210 if err := mountTPUDeviceInfoInChroot(chroot, devPath, sysfsFormat, pathGlobToPciDeviceFormat[pathGlob]); err != nil { 211 return err 212 } 213 } 214 } 215 return nil 216 }