github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/runsc/cmd/chroot.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cmd 16 17 import ( 18 "errors" 19 "fmt" 20 "os" 21 "path" 22 "path/filepath" 23 "regexp" 24 25 "github.com/MerlinKodo/gvisor/pkg/log" 26 "github.com/MerlinKodo/gvisor/runsc/cmd/util" 27 "github.com/MerlinKodo/gvisor/runsc/config" 28 "github.com/MerlinKodo/gvisor/runsc/specutils" 29 specs "github.com/opencontainers/runtime-spec/specs-go" 30 "golang.org/x/sys/unix" 31 ) 32 33 // mountInChroot creates the destination mount point in the given chroot and 34 // mounts the source. 35 func mountInChroot(chroot, src, dst, typ string, flags uint32) error { 36 chrootDst := filepath.Join(chroot, dst) 37 log.Infof("Mounting %q at %q", src, chrootDst) 38 39 if err := specutils.SafeSetupAndMount(src, chrootDst, typ, flags, "/proc"); err != nil { 40 return fmt.Errorf("error mounting %q at %q: %v", src, chrootDst, err) 41 } 42 return nil 43 } 44 45 func pivotRoot(root string) error { 46 if err := os.Chdir(root); err != nil { 47 return fmt.Errorf("error changing working directory: %v", err) 48 } 49 // pivot_root(new_root, put_old) moves the root filesystem (old_root) 50 // of the calling process to the directory put_old and makes new_root 51 // the new root filesystem of the calling process. 52 // 53 // pivot_root(".", ".") makes a mount of the working directory the new 54 // root filesystem, so it will be moved in "/" and then the old_root 55 // will be moved to "/" too. The parent mount of the old_root will be 56 // new_root, so after umounting the old_root, we will see only 57 // the new_root in "/". 58 if err := unix.PivotRoot(".", "."); err != nil { 59 return fmt.Errorf("pivot_root failed, make sure that the root mount has a parent: %v", err) 60 } 61 62 if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { 63 return fmt.Errorf("error umounting the old root file system: %v", err) 64 } 65 return nil 66 } 67 68 func copyFile(dst, src string) error { 69 in, err := os.Open(src) 70 if err != nil { 71 return err 72 } 73 defer in.Close() 74 75 out, err := os.Create(dst) 76 if err != nil { 77 return err 78 } 79 defer out.Close() 80 81 _, err = out.ReadFrom(in) 82 return err 83 } 84 85 // setUpChroot creates an empty directory with runsc mounted at /runsc and proc 86 // mounted at /proc. 87 func setUpChroot(pidns bool, spec *specs.Spec, conf *config.Config, nvidiaDevMinors []uint32) error { 88 // We are a new mount namespace, so we can use /tmp as a directory to 89 // construct a new root. 90 chroot := os.TempDir() 91 92 log.Infof("Setting up sandbox chroot in %q", chroot) 93 94 // Convert all shared mounts into slave to be sure that nothing will be 95 // propagated outside of our namespace. 96 if err := specutils.SafeMount("", "/", "", unix.MS_SLAVE|unix.MS_REC, "", "/proc"); err != nil { 97 return fmt.Errorf("error converting mounts: %v", err) 98 } 99 100 if err := specutils.SafeMount("runsc-root", chroot, "tmpfs", unix.MS_NOSUID|unix.MS_NODEV|unix.MS_NOEXEC, "", "/proc"); err != nil { 101 return fmt.Errorf("error mounting tmpfs in chroot: %v", err) 102 } 103 104 if err := os.Mkdir(filepath.Join(chroot, "etc"), 0755); err != nil { 105 return fmt.Errorf("error creating /etc in chroot: %v", err) 106 } 107 108 if err := copyFile(filepath.Join(chroot, "etc/localtime"), "/etc/localtime"); err != nil { 109 log.Warningf("Failed to copy /etc/localtime: %v. UTC timezone will be used.", err) 110 } 111 112 if pidns { 113 flags := uint32(unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC | unix.MS_RDONLY) 114 if err := mountInChroot(chroot, "proc", "/proc", "proc", flags); err != nil { 115 return fmt.Errorf("error mounting proc in chroot: %v", err) 116 } 117 } else { 118 if err := mountInChroot(chroot, "/proc", "/proc", "bind", unix.MS_BIND|unix.MS_RDONLY|unix.MS_REC); err != nil { 119 return fmt.Errorf("error mounting proc in chroot: %v", err) 120 } 121 } 122 123 if err := nvproxyUpdateChroot(chroot, spec, conf, nvidiaDevMinors); err != nil { 124 return fmt.Errorf("error configuring chroot for Nvidia GPUs: %w", err) 125 } 126 if err := tpuProxyUpdateChroot(chroot, conf); err != nil { 127 return fmt.Errorf("error configuring chroot for TPU devices: %w", err) 128 } 129 130 if err := specutils.SafeMount("", chroot, "", unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_BIND, "", "/proc"); err != nil { 131 return fmt.Errorf("error remounting chroot in read-only: %v", err) 132 } 133 134 return pivotRoot(chroot) 135 } 136 137 func tpuProxyUpdateChroot(chroot string, conf *config.Config) error { 138 if !conf.TPUProxy { 139 return nil 140 } 141 devices, err := util.EnumerateHostTPUDevices() 142 if err != nil { 143 return fmt.Errorf("enumerating TPU device files: %w", err) 144 } 145 for _, deviceNum := range devices { 146 devPath := fmt.Sprintf("/dev/accel%d", deviceNum) 147 if err := mountInChroot(chroot, devPath, devPath, "bind", unix.MS_BIND); err != nil { 148 return fmt.Errorf("error mounting %q in chroot: %v", devPath, err) 149 } 150 finfo, err := os.Stat(path.Join(chroot, devPath)) 151 if err != nil { 152 return fmt.Errorf("error statting %q: %v", devPath, err) 153 } 154 // Ensure the file mounted in was a char device file. 155 if finfo.Mode()&os.ModeType != os.ModeCharDevice|os.ModeDevice { 156 return fmt.Errorf("unexpected file type for %q, want %s, got %s", path.Join(chroot, devPath), os.ModeCharDevice|os.ModeDevice, finfo.Mode()&os.ModeType) 157 158 } 159 // Multiple paths link to the /sys/devices/pci0000:00/<pci_address> 160 // directory that contains all relevant sysfs accel device info that we need 161 // bind mounted into the sandbox chroot. We can construct this path by 162 // reading the link below, which points to 163 // /sys/devices/pci0000:00/<pci_address>/accel/accel# and traversing up 2 164 // directories. 165 sysAccelPath := fmt.Sprintf("/sys/class/accel/accel%d", deviceNum) 166 sysAccelLink, err := os.Readlink(sysAccelPath) 167 if err != nil { 168 return fmt.Errorf("error reading %q: %v", sysAccelPath, err) 169 } 170 // Ensure the link is in the form we expect. 171 sysAccelLinkMatcher := regexp.MustCompile(fmt.Sprintf(`../../devices/pci0000:00/(\d+:\d+:\d+\.\d+)/accel/accel%d`, deviceNum)) 172 if !sysAccelLinkMatcher.MatchString(sysAccelLink) { 173 return fmt.Errorf("unexpected link %q -> %q, link should have %q format", sysAccelPath, sysAccelLink, sysAccelLinkMatcher.String()) 174 } 175 sysPCIDeviceDir, err := filepath.Abs(path.Join(filepath.Dir(sysAccelPath), sysAccelLink, "../..")) 176 if err != nil { 177 return fmt.Errorf("error parsing path %q: %v", sysAccelPath, err) 178 } 179 if err := mountInChroot(chroot, sysPCIDeviceDir, sysPCIDeviceDir, "bind", unix.MS_BIND|unix.MS_RDONLY); err != nil { 180 return fmt.Errorf("error mounting %q in chroot: %v", sysAccelPath, err) 181 } 182 } 183 return nil 184 } 185 186 func nvproxyUpdateChroot(chroot string, spec *specs.Spec, conf *config.Config, devMinors []uint32) error { 187 if !specutils.GPUFunctionalityRequested(spec, conf) { 188 return nil 189 } 190 if err := os.Mkdir(filepath.Join(chroot, "dev"), 0755); err != nil && !errors.Is(err, os.ErrExist) { 191 return fmt.Errorf("error creating /dev in chroot: %w", err) 192 } 193 if err := mountInChroot(chroot, "/dev/nvidiactl", "/dev/nvidiactl", "bind", unix.MS_BIND); err != nil { 194 return fmt.Errorf("error mounting /dev/nvidiactl in chroot: %w", err) 195 } 196 if err := mountInChroot(chroot, "/dev/nvidia-uvm", "/dev/nvidia-uvm", "bind", unix.MS_BIND); err != nil { 197 return fmt.Errorf("error mounting /dev/nvidia-uvm in chroot: %w", err) 198 } 199 for _, devMinor := range devMinors { 200 path := fmt.Sprintf("/dev/nvidia%d", devMinor) 201 if err := mountInChroot(chroot, path, path, "bind", unix.MS_BIND); err != nil { 202 return fmt.Errorf("error mounting %q in chroot: %v", path, err) 203 } 204 } 205 return nil 206 }