github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/runsc/boot/vfs.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package boot 16 17 import ( 18 "fmt" 19 "path" 20 "path/filepath" 21 "regexp" 22 "sort" 23 "strconv" 24 "strings" 25 26 specs "github.com/opencontainers/runtime-spec/specs-go" 27 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 28 "github.com/nicocha30/gvisor-ligolo/pkg/cleanup" 29 "github.com/nicocha30/gvisor-ligolo/pkg/context" 30 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 31 "github.com/nicocha30/gvisor-ligolo/pkg/fd" 32 "github.com/nicocha30/gvisor-ligolo/pkg/fspath" 33 "github.com/nicocha30/gvisor-ligolo/pkg/log" 34 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/devices/accel" 35 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/devices/memdev" 36 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/devices/nvproxy" 37 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/devices/ttydev" 38 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/devices/tundev" 39 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/cgroupfs" 40 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/devpts" 41 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/devtmpfs" 42 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/fuse" 43 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/gofer" 44 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/mqfs" 45 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/overlay" 46 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/proc" 47 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/sys" 48 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/tmpfs" 49 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/user" 50 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/inet" 51 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel" 52 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 53 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 54 "github.com/nicocha30/gvisor-ligolo/runsc/config" 55 "github.com/nicocha30/gvisor-ligolo/runsc/specutils" 56 ) 57 58 // Supported filesystems that map to different internal filesystems. 59 const ( 60 Bind = "bind" 61 Nonefs = "none" 62 ) 63 64 // SelfOverlayFilestorePrefix is the prefix in the file name of the 65 // self overlay filestore file. 66 const SelfOverlayFilestorePrefix = ".gvisor.overlay.img." 67 68 // SelfOverlayFilestorePath returns the path at which the self overlay 69 // filestore file is stored for a given mount. 70 func SelfOverlayFilestorePath(mountSrc, sandboxID string) string { 71 // We will place the filestore file in a gVisor specific hidden file inside 72 // the mount being overlay-ed itself. The same volume can be overlay-ed by 73 // multiple sandboxes. So make the filestore file unique to a sandbox by 74 // suffixing the sandbox ID. 75 return path.Join(mountSrc, selfOverlayFilestoreName(sandboxID)) 76 } 77 78 func selfOverlayFilestoreName(sandboxID string) string { 79 return SelfOverlayFilestorePrefix + sandboxID 80 } 81 82 // tmpfs has some extra supported options that we must pass through. 83 var tmpfsAllowedData = []string{"mode", "size", "uid", "gid"} 84 85 func registerFilesystems(k *kernel.Kernel, info *containerInfo) error { 86 ctx := k.SupervisorContext() 87 creds := auth.NewRootCredentials(k.RootUserNamespace()) 88 vfsObj := k.VFS() 89 90 vfsObj.MustRegisterFilesystemType(cgroupfs.Name, &cgroupfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 91 AllowUserMount: true, 92 AllowUserList: true, 93 }) 94 vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 95 AllowUserList: true, 96 // TODO(b/29356795): Users may mount this once the terminals are in a 97 // usable state. 98 AllowUserMount: false, 99 }) 100 vfsObj.MustRegisterFilesystemType(devtmpfs.Name, &devtmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 101 AllowUserMount: true, 102 AllowUserList: true, 103 }) 104 vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 105 AllowUserMount: true, 106 AllowUserList: true, 107 }) 108 vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 109 AllowUserList: true, 110 }) 111 vfsObj.MustRegisterFilesystemType(overlay.Name, &overlay.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 112 AllowUserMount: true, 113 AllowUserList: true, 114 }) 115 vfsObj.MustRegisterFilesystemType(proc.Name, &proc.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 116 AllowUserMount: true, 117 AllowUserList: true, 118 }) 119 vfsObj.MustRegisterFilesystemType(sys.Name, &sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 120 AllowUserMount: true, 121 AllowUserList: true, 122 }) 123 vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 124 AllowUserMount: true, 125 AllowUserList: true, 126 }) 127 vfsObj.MustRegisterFilesystemType(mqfs.Name, &mqfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 128 AllowUserMount: true, 129 AllowUserList: true, 130 }) 131 132 // Register devices. 133 if err := memdev.Register(vfsObj); err != nil { 134 return fmt.Errorf("registering memdev: %w", err) 135 } 136 if err := ttydev.Register(vfsObj); err != nil { 137 return fmt.Errorf("registering ttydev: %w", err) 138 } 139 tunSupported := tundev.IsNetTunSupported(inet.StackFromContext(ctx)) 140 if tunSupported { 141 if err := tundev.Register(vfsObj); err != nil { 142 return fmt.Errorf("registering tundev: %v", err) 143 } 144 } 145 if err := fuse.Register(vfsObj); err != nil { 146 return fmt.Errorf("registering fusedev: %w", err) 147 } 148 149 // Setup files in devtmpfs. 150 a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name) 151 if err != nil { 152 return fmt.Errorf("creating devtmpfs accessor: %w", err) 153 } 154 defer a.Release(ctx) 155 156 if err := a.UserspaceInit(ctx); err != nil { 157 return fmt.Errorf("initializing userspace: %w", err) 158 } 159 if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil { 160 return fmt.Errorf("creating memdev devtmpfs files: %w", err) 161 } 162 if err := ttydev.CreateDevtmpfsFiles(ctx, a); err != nil { 163 return fmt.Errorf("creating ttydev devtmpfs files: %w", err) 164 } 165 if tunSupported { 166 if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil { 167 return fmt.Errorf("creating tundev devtmpfs files: %v", err) 168 } 169 } 170 if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil { 171 return fmt.Errorf("creating fusedev devtmpfs files: %w", err) 172 } 173 174 if err := nvproxyRegisterDevicesAndCreateFiles(ctx, info, k, vfsObj, a); err != nil { 175 return err 176 } 177 178 if err := tpuProxyRegisterDevicesAndCreateFiles(ctx, info, k, vfsObj, a); err != nil { 179 return err 180 } 181 182 return nil 183 } 184 185 func setupContainerVFS(ctx context.Context, info *containerInfo, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { 186 // Create context with root credentials to mount the filesystem (the current 187 // user may not be privileged enough). 188 rootCreds := auth.NewRootCredentials(procArgs.Credentials.UserNamespace) 189 rootProcArgs := *procArgs 190 rootProcArgs.WorkingDirectory = "/" 191 rootProcArgs.Credentials = rootCreds 192 rootProcArgs.Umask = 0022 193 rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals 194 rootCtx := rootProcArgs.NewContext(mntr.k) 195 196 mns, err := mntr.mountAll(rootCtx, rootCreds, info.conf, &rootProcArgs) 197 if err != nil { 198 return fmt.Errorf("failed to setupFS: %w", err) 199 } 200 procArgs.MountNamespace = mns 201 202 mnsRoot := mns.Root() 203 mnsRoot.IncRef() 204 defer mnsRoot.DecRef(rootCtx) 205 206 if err := createDeviceFiles(rootCtx, rootCreds, info, mntr.k.VFS(), mnsRoot); err != nil { 207 return fmt.Errorf("failed to create device files: %w", err) 208 } 209 210 // We are executing a file directly. Do not resolve the executable path. 211 if procArgs.File != nil { 212 return nil 213 } 214 // Resolve the executable path from working dir and environment. 215 resolved, err := user.ResolveExecutablePath(ctx, procArgs) 216 if err != nil { 217 return err 218 } 219 procArgs.Filename = resolved 220 return nil 221 } 222 223 // compileMounts returns the supported mounts from the mount spec, adding any 224 // mandatory mounts that are required by the OCI specification. 225 // 226 // This function must NOT add/remove any gofer mounts or change their order. 227 func compileMounts(spec *specs.Spec, conf *config.Config) []specs.Mount { 228 // Keep track of whether proc and sys were mounted. 229 var procMounted, sysMounted, devMounted, devptsMounted bool 230 var mounts []specs.Mount 231 232 // Mount all submounts from the spec. 233 for _, m := range spec.Mounts { 234 // Unconditionally drop any cgroupfs mounts. If requested, we'll add our 235 // own below. 236 if m.Type == cgroupfs.Name { 237 continue 238 } 239 switch filepath.Clean(m.Destination) { 240 case "/proc": 241 procMounted = true 242 case "/sys": 243 sysMounted = true 244 case "/dev": 245 m.Type = devtmpfs.Name 246 devMounted = true 247 case "/dev/pts": 248 m.Type = devpts.Name 249 devptsMounted = true 250 } 251 mounts = append(mounts, m) 252 } 253 254 // Mount proc and sys even if the user did not ask for it, as the spec 255 // says we SHOULD. 256 var mandatoryMounts []specs.Mount 257 258 if conf.Cgroupfs { 259 mandatoryMounts = append(mandatoryMounts, specs.Mount{ 260 Type: tmpfs.Name, 261 Destination: "/sys/fs/cgroup", 262 }) 263 mandatoryMounts = append(mandatoryMounts, specs.Mount{ 264 Type: cgroupfs.Name, 265 Destination: "/sys/fs/cgroup/memory", 266 Options: []string{"memory"}, 267 }) 268 mandatoryMounts = append(mandatoryMounts, specs.Mount{ 269 Type: cgroupfs.Name, 270 Destination: "/sys/fs/cgroup/cpu", 271 Options: []string{"cpu"}, 272 }) 273 } 274 275 if !procMounted { 276 mandatoryMounts = append(mandatoryMounts, specs.Mount{ 277 Type: proc.Name, 278 Destination: "/proc", 279 }) 280 } 281 if !sysMounted { 282 mandatoryMounts = append(mandatoryMounts, specs.Mount{ 283 Type: sys.Name, 284 Destination: "/sys", 285 }) 286 } 287 if !devMounted { 288 mandatoryMounts = append(mandatoryMounts, specs.Mount{ 289 Type: devtmpfs.Name, 290 Destination: "/dev", 291 }) 292 } 293 if !devptsMounted { 294 mandatoryMounts = append(mandatoryMounts, specs.Mount{ 295 Type: devpts.Name, 296 Destination: "/dev/pts", 297 }) 298 } 299 300 // The mandatory mounts should be ordered right after the root, in case 301 // there are submounts of these mandatory mounts already in the spec. 302 mounts = append(mounts[:0], append(mandatoryMounts, mounts[0:]...)...) 303 304 return mounts 305 } 306 307 // goferMountData creates a slice of gofer mount data. 308 func goferMountData(fd int, fa config.FileAccessType, conf *config.Config) []string { 309 opts := []string{ 310 "trans=fd", 311 "rfdno=" + strconv.Itoa(fd), 312 "wfdno=" + strconv.Itoa(fd), 313 } 314 if fa == config.FileAccessShared { 315 opts = append(opts, "cache=remote_revalidating") 316 } 317 if conf.DirectFS { 318 opts = append(opts, "directfs") 319 } 320 if !conf.HostFifo.AllowOpen() { 321 opts = append(opts, "disable_fifo_open") 322 } 323 return opts 324 } 325 326 // parseAndFilterOptions parses a MountOptions slice and filters by the allowed 327 // keys. 328 func parseAndFilterOptions(opts []string, allowedKeys ...string) ([]string, error) { 329 var out []string 330 for _, o := range opts { 331 ok, err := parseMountOption(o, allowedKeys...) 332 if err != nil { 333 return nil, err 334 } 335 if ok { 336 out = append(out, o) 337 } 338 } 339 return out, nil 340 } 341 342 func parseMountOption(opt string, allowedKeys ...string) (bool, error) { 343 kv := strings.SplitN(opt, "=", 3) 344 if len(kv) > 2 { 345 return false, fmt.Errorf("invalid option %q", opt) 346 } 347 return specutils.ContainsStr(allowedKeys, kv[0]), nil 348 } 349 350 type fdDispenser struct { 351 fds []*fd.FD 352 } 353 354 func (f *fdDispenser) remove() int { 355 return f.removeAsFD().Release() 356 } 357 358 func (f *fdDispenser) removeAsFD() *fd.FD { 359 if f.empty() { 360 panic("fdDispenser out of fds") 361 } 362 rv := f.fds[0] 363 f.fds = f.fds[1:] 364 return rv 365 } 366 367 func (f *fdDispenser) empty() bool { 368 return len(f.fds) == 0 369 } 370 371 type containerMounter struct { 372 root *specs.Root 373 374 // mounts is the set of submounts for the container. It's a copy from the spec 375 // that may be freely modified without affecting the original spec. 376 mounts []specs.Mount 377 378 // fds is the list of FDs to be dispensed for mounts that require it. 379 fds fdDispenser 380 381 // overlayFilestoreFDs are the FDs to the regular files that will back the 382 // tmpfs upper mount in the overlay mounts. 383 overlayFilestoreFDs fdDispenser 384 385 // overlayMediums contains information about how the gofer mounts have been 386 // overlaid. The first entry is for rootfs and the following entries are for 387 // bind mounts in `mounts` slice above (in the same order). 388 overlayMediums []OverlayMedium 389 390 k *kernel.Kernel 391 392 hints *PodMountHints 393 394 // productName is the value to show in 395 // /sys/devices/virtual/dmi/id/product_name. 396 productName string 397 398 // sandboxID is the ID for the whole sandbox. 399 sandboxID string 400 } 401 402 func newContainerMounter(info *containerInfo, k *kernel.Kernel, hints *PodMountHints, productName string, sandboxID string) *containerMounter { 403 return &containerMounter{ 404 root: info.spec.Root, 405 mounts: compileMounts(info.spec, info.conf), 406 fds: fdDispenser{fds: info.goferFDs}, 407 overlayFilestoreFDs: fdDispenser{fds: info.overlayFilestoreFDs}, 408 overlayMediums: info.overlayMediums, 409 k: k, 410 hints: hints, 411 productName: productName, 412 sandboxID: sandboxID, 413 } 414 } 415 416 func (c *containerMounter) checkDispenser() error { 417 if !c.fds.empty() { 418 return fmt.Errorf("not all gofer FDs were consumed, remaining: %v", c.fds) 419 } 420 return nil 421 } 422 423 func (c *containerMounter) getMountAccessType(conf *config.Config, mount *specs.Mount, hint *MountHint) config.FileAccessType { 424 if hint != nil { 425 return hint.fileAccessType() 426 } 427 return conf.FileAccessMounts 428 } 429 430 func (c *containerMounter) mountAll(rootCtx context.Context, rootCreds *auth.Credentials, conf *config.Config, rootProcArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) { 431 log.Infof("Configuring container's file system") 432 433 mns, err := c.createMountNamespace(rootCtx, conf, rootCreds) 434 if err != nil { 435 return nil, fmt.Errorf("creating mount namespace: %w", err) 436 } 437 rootProcArgs.MountNamespace = mns 438 439 root := mns.Root() 440 root.IncRef() 441 defer root.DecRef(rootCtx) 442 if root.Mount().ReadOnly() { 443 // Switch to ReadWrite while we setup submounts. 444 if err := c.k.VFS().SetMountReadOnly(root.Mount(), false); err != nil { 445 return nil, fmt.Errorf(`failed to set mount at "/" readwrite: %w`, err) 446 } 447 // Restore back to ReadOnly at the end. 448 defer func() { 449 if err := c.k.VFS().SetMountReadOnly(root.Mount(), true); err != nil { 450 panic(fmt.Sprintf(`failed to restore mount at "/" back to readonly: %v`, err)) 451 } 452 }() 453 } 454 455 // Mount submounts. 456 if err := c.mountSubmounts(rootCtx, conf, mns, rootCreds); err != nil { 457 return nil, fmt.Errorf("mounting submounts: %w", err) 458 } 459 460 return mns, nil 461 } 462 463 // createMountNamespace creates the container's root mount and namespace. 464 func (c *containerMounter) createMountNamespace(ctx context.Context, conf *config.Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { 465 ioFD := c.fds.remove() 466 data := goferMountData(ioFD, conf.FileAccess, conf) 467 468 // We can't check for overlayfs here because sandbox is chroot'ed and gofer 469 // can only send mount options for specs.Mounts (specs.Root is missing 470 // Options field). So assume root is always on top of overlayfs. 471 data = append(data, "overlayfs_stale_read") 472 473 // Configure the gofer dentry cache size. 474 gofer.SetDentryCacheSize(conf.DCache) 475 476 log.Infof("Mounting root with gofer, ioFD: %d", ioFD) 477 opts := &vfs.MountOptions{ 478 ReadOnly: c.root.Readonly, 479 GetFilesystemOptions: vfs.GetFilesystemOptions{ 480 Data: strings.Join(data, ","), 481 InternalData: gofer.InternalFilesystemOptions{ 482 UniqueID: "/", 483 }, 484 }, 485 InternalMount: true, 486 } 487 488 fsName := gofer.Name 489 if c.overlayMediums[0].IsEnabled() { 490 log.Infof("Adding overlay on top of root") 491 var ( 492 err error 493 cleanup func() 494 overlayFilestore *fd.FD 495 ) 496 if c.overlayMediums[0].IsBackedByHostFile() { 497 overlayFilestore = c.overlayFilestoreFDs.removeAsFD() 498 } 499 opts, cleanup, err = c.configureOverlay(ctx, conf, creds, opts, fsName, overlayFilestore, c.overlayMediums[0]) 500 if err != nil { 501 return nil, fmt.Errorf("mounting root with overlay: %w", err) 502 } 503 defer cleanup() 504 fsName = overlay.Name 505 } 506 507 mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", fsName, opts) 508 if err != nil { 509 return nil, fmt.Errorf("setting up mount namespace: %w", err) 510 } 511 return mns, nil 512 } 513 514 // configureOverlay mounts the lower layer using "lowerOpts", mounts the upper 515 // layer using tmpfs, and return overlay mount options. "cleanup" must be called 516 // after the options have been used to mount the overlay, to release refs on 517 // lower and upper mounts. 518 func (c *containerMounter) configureOverlay(ctx context.Context, conf *config.Config, creds *auth.Credentials, lowerOpts *vfs.MountOptions, lowerFSName string, filestoreFD *fd.FD, medium OverlayMedium) (*vfs.MountOptions, func(), error) { 519 // First copy options from lower layer to upper layer and overlay. Clear 520 // filesystem specific options. 521 upperOpts := *lowerOpts 522 upperOpts.GetFilesystemOptions = vfs.GetFilesystemOptions{} 523 524 overlayOpts := *lowerOpts 525 overlayOpts.GetFilesystemOptions = vfs.GetFilesystemOptions{} 526 527 // All writes go to the upper layer, be paranoid and make lower readonly. 528 lowerOpts.ReadOnly = true 529 lower, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, lowerFSName, lowerOpts) 530 if err != nil { 531 return nil, nil, err 532 } 533 cu := cleanup.Make(func() { lower.DecRef(ctx) }) 534 defer cu.Clean() 535 536 // Determine the lower layer's root's type. 537 lowerRootVD := vfs.MakeVirtualDentry(lower, lower.Root()) 538 stat, err := c.k.VFS().StatAt(ctx, creds, &vfs.PathOperation{ 539 Root: lowerRootVD, 540 Start: lowerRootVD, 541 }, &vfs.StatOptions{ 542 Mask: linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE | linux.STATX_TYPE, 543 }) 544 if err != nil { 545 return nil, nil, fmt.Errorf("failed to stat lower layer's root: %v", err) 546 } 547 if stat.Mask&linux.STATX_TYPE == 0 { 548 return nil, nil, fmt.Errorf("failed to get file type of lower layer's root") 549 } 550 rootType := stat.Mode & linux.S_IFMT 551 if rootType != linux.S_IFDIR && rootType != linux.S_IFREG { 552 return nil, nil, fmt.Errorf("lower layer's root has unsupported file type %v", rootType) 553 } 554 555 // Upper is a tmpfs mount to keep all modifications inside the sandbox. 556 tmpfsOpts := tmpfs.FilesystemOpts{ 557 RootFileType: uint16(rootType), 558 FilestoreFD: filestoreFD, 559 // If a mount is being overlaid, it should not be limited by the default 560 // tmpfs size limit. 561 DisableDefaultSizeLimit: true, 562 } 563 upperOpts.GetFilesystemOptions.InternalData = tmpfsOpts 564 upper, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, tmpfs.Name, &upperOpts) 565 if err != nil { 566 return nil, nil, fmt.Errorf("failed to create upper layer for overlay, opts: %+v: %v", upperOpts, err) 567 } 568 cu.Add(func() { upper.DecRef(ctx) }) 569 570 // If the overlay mount consists of a regular file, copy up its contents 571 // from the lower layer, since in the overlay the otherwise-empty upper 572 // layer file will take precedence. 573 upperRootVD := vfs.MakeVirtualDentry(upper, upper.Root()) 574 if rootType == linux.S_IFREG { 575 lowerFD, err := c.k.VFS().OpenAt(ctx, creds, &vfs.PathOperation{ 576 Root: lowerRootVD, 577 Start: lowerRootVD, 578 }, &vfs.OpenOptions{ 579 Flags: linux.O_RDONLY, 580 }) 581 if err != nil { 582 return nil, nil, fmt.Errorf("failed to open lower layer root for copying: %v", err) 583 } 584 defer lowerFD.DecRef(ctx) 585 upperFD, err := c.k.VFS().OpenAt(ctx, creds, &vfs.PathOperation{ 586 Root: upperRootVD, 587 Start: upperRootVD, 588 }, &vfs.OpenOptions{ 589 Flags: linux.O_WRONLY, 590 }) 591 if err != nil { 592 return nil, nil, fmt.Errorf("failed to open upper layer root for copying: %v", err) 593 } 594 defer upperFD.DecRef(ctx) 595 if _, err := vfs.CopyRegularFileData(ctx, upperFD, lowerFD); err != nil { 596 return nil, nil, fmt.Errorf("failed to copy up overlay file: %v", err) 597 } 598 } 599 600 // We need to hide the filestore from the containerized application. 601 if medium == SelfMedium { 602 if err := overlay.CreateWhiteout(ctx, c.k.VFS(), creds, &vfs.PathOperation{ 603 Root: upperRootVD, 604 Start: upperRootVD, 605 Path: fspath.Parse(selfOverlayFilestoreName(c.sandboxID)), 606 }); err != nil { 607 return nil, nil, fmt.Errorf("failed to create whiteout to hide self overlay filestore: %w", err) 608 } 609 } 610 611 // Propagate the lower layer's root's owner, group, and mode to the upper 612 // layer's root for consistency with VFS1. 613 err = c.k.VFS().SetStatAt(ctx, creds, &vfs.PathOperation{ 614 Root: upperRootVD, 615 Start: upperRootVD, 616 }, &vfs.SetStatOptions{ 617 Stat: linux.Statx{ 618 Mask: (linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE) & stat.Mask, 619 UID: stat.UID, 620 GID: stat.GID, 621 Mode: stat.Mode, 622 }, 623 }) 624 if err != nil { 625 return nil, nil, err 626 } 627 628 // Configure overlay with both layers. 629 overlayOpts.GetFilesystemOptions.InternalData = overlay.FilesystemOptions{ 630 UpperRoot: upperRootVD, 631 LowerRoots: []vfs.VirtualDentry{lowerRootVD}, 632 } 633 return &overlayOpts, cu.Release(), nil 634 } 635 636 func (c *containerMounter) mountSubmounts(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials) error { 637 mounts, err := c.prepareMounts() 638 if err != nil { 639 return err 640 } 641 642 for i := range mounts { 643 submount := &mounts[i] 644 log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.mount.Source, submount.mount.Destination, submount.mount.Type, submount.mount.Options) 645 var ( 646 mnt *vfs.Mount 647 err error 648 ) 649 650 if submount.hint != nil && submount.hint.shouldShareMount() { 651 mnt, err = c.mountSharedSubmount(ctx, conf, mns, creds, submount.mount, submount.hint) 652 if err != nil { 653 return fmt.Errorf("mount shared mount %q to %q: %v", submount.hint.name, submount.mount.Destination, err) 654 } 655 } else { 656 mnt, err = c.mountSubmount(ctx, conf, mns, creds, submount) 657 if err != nil { 658 return fmt.Errorf("mount submount %q: %w", submount.mount.Destination, err) 659 } 660 } 661 662 if mnt != nil && mnt.ReadOnly() { 663 // Switch to ReadWrite while we setup submounts. 664 if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil { 665 return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.mount.Destination, err) 666 } 667 // Restore back to ReadOnly at the end. 668 defer func() { 669 if err := c.k.VFS().SetMountReadOnly(mnt, true); err != nil { 670 panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.mount.Destination, err)) 671 } 672 }() 673 } 674 } 675 676 if err := c.mountTmp(ctx, conf, creds, mns); err != nil { 677 return fmt.Errorf(`mount submount "\tmp": %w`, err) 678 } 679 return nil 680 } 681 682 type mountInfo struct { 683 mount *specs.Mount 684 fd int 685 hint *MountHint 686 overlayMedium OverlayMedium 687 overlayFilestoreFD *fd.FD 688 } 689 690 func newNonGoferMountInfo(mount *specs.Mount) *mountInfo { 691 return &mountInfo{mount: mount, fd: -1} 692 } 693 694 func (c *containerMounter) prepareMounts() ([]mountInfo, error) { 695 // Associate bind mounts with their FDs before sorting since there is an 696 // undocumented assumption that FDs are dispensed in the order in which 697 // they are required by mounts. 698 var mounts []mountInfo 699 goferMntIdx := 1 // First index is for rootfs. 700 for i := range c.mounts { 701 m := &c.mounts[i] 702 specutils.MaybeConvertToBindMount(m) 703 704 // Only bind mounts use host FDs; see 705 // containerMounter.getMountNameAndOptions. 706 info := mountInfo{ 707 mount: m, 708 fd: -1, 709 hint: c.hints.FindMount(m), 710 overlayMedium: NoOverlay, 711 } 712 if specutils.IsGoferMount(*m) { 713 info.fd = c.fds.remove() 714 info.overlayMedium = c.overlayMediums[goferMntIdx] 715 if info.overlayMedium.IsBackedByHostFile() { 716 info.overlayFilestoreFD = c.overlayFilestoreFDs.removeAsFD() 717 } 718 goferMntIdx++ 719 } 720 mounts = append(mounts, info) 721 } 722 if err := c.checkDispenser(); err != nil { 723 return nil, err 724 } 725 726 // Sort the mounts so that we don't place children before parents. 727 sort.Slice(mounts, func(i, j int) bool { 728 return len(mounts[i].mount.Destination) < len(mounts[j].mount.Destination) 729 }) 730 731 return mounts, nil 732 } 733 734 func (c *containerMounter) mountSubmount(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountInfo) (*vfs.Mount, error) { 735 fsName, opts, err := c.getMountNameAndOptions(conf, submount) 736 if err != nil { 737 return nil, fmt.Errorf("mountOptions failed: %w", err) 738 } 739 if len(fsName) == 0 { 740 // Filesystem is not supported (e.g. cgroup), just skip it. 741 return nil, nil 742 } 743 744 if err := c.makeMountPoint(ctx, creds, mns, submount.mount.Destination); err != nil { 745 return nil, fmt.Errorf("creating mount point %q: %w", submount.mount.Destination, err) 746 } 747 748 if submount.overlayMedium.IsEnabled() { 749 log.Infof("Adding overlay on top of mount %q", submount.mount.Destination) 750 var cleanup func() 751 opts, cleanup, err = c.configureOverlay(ctx, conf, creds, opts, fsName, submount.overlayFilestoreFD, submount.overlayMedium) 752 if err != nil { 753 return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.mount.Destination, err) 754 } 755 defer cleanup() 756 fsName = overlay.Name 757 } 758 759 root := mns.Root() 760 root.IncRef() 761 defer root.DecRef(ctx) 762 target := &vfs.PathOperation{ 763 Root: root, 764 Start: root, 765 Path: fspath.Parse(submount.mount.Destination), 766 } 767 mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts) 768 if err != nil { 769 return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.mount.Destination, submount.mount.Type, err, opts) 770 } 771 log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.mount.Source, submount.mount.Destination, submount.mount.Type, opts.GetFilesystemOptions.Data) 772 return mnt, nil 773 } 774 775 // getMountNameAndOptions retrieves the fsName, opts, and useOverlay values 776 // used for mounts. 777 func (c *containerMounter) getMountNameAndOptions(conf *config.Config, m *mountInfo) (string, *vfs.MountOptions, error) { 778 fsName := m.mount.Type 779 var ( 780 data []string 781 internalData any 782 ) 783 784 // Find filesystem name and FS specific data field. 785 switch m.mount.Type { 786 case devpts.Name, devtmpfs.Name, proc.Name: 787 // Nothing to do. 788 789 case Nonefs: 790 fsName = sys.Name 791 792 case sys.Name: 793 sysData := &sys.InternalData{EnableAccelSysfs: conf.TPUProxy} 794 if len(c.productName) > 0 { 795 sysData.ProductName = c.productName 796 } 797 internalData = sysData 798 799 case tmpfs.Name: 800 var err error 801 data, err = parseAndFilterOptions(m.mount.Options, tmpfsAllowedData...) 802 if err != nil { 803 return "", nil, err 804 } 805 806 case Bind: 807 fsName = gofer.Name 808 if m.fd < 0 { 809 // Check that an FD was provided to fails fast. 810 return "", nil, fmt.Errorf("gofer mount requires a connection FD") 811 } 812 data = goferMountData(m.fd, c.getMountAccessType(conf, m.mount, m.hint), conf) 813 internalData = gofer.InternalFilesystemOptions{ 814 UniqueID: m.mount.Destination, 815 } 816 817 case cgroupfs.Name: 818 var err error 819 data, err = parseAndFilterOptions(m.mount.Options, cgroupfs.SupportedMountOptions...) 820 if err != nil { 821 return "", nil, err 822 } 823 824 default: 825 log.Warningf("ignoring unknown filesystem type %q", m.mount.Type) 826 return "", nil, nil 827 } 828 829 opts := ParseMountOptions(m.mount.Options) 830 opts.GetFilesystemOptions = vfs.GetFilesystemOptions{ 831 Data: strings.Join(data, ","), 832 InternalData: internalData, 833 } 834 835 return fsName, opts, nil 836 } 837 838 // ParseMountOptions converts specs.Mount.Options to vfs.MountOptions. 839 func ParseMountOptions(opts []string) *vfs.MountOptions { 840 mountOpts := &vfs.MountOptions{ 841 InternalMount: true, 842 } 843 // Note: update mountHint.CheckCompatible when more options are added. 844 for _, o := range opts { 845 switch o { 846 case "ro": 847 mountOpts.ReadOnly = true 848 case "noatime": 849 mountOpts.Flags.NoATime = true 850 case "noexec": 851 mountOpts.Flags.NoExec = true 852 case "rw", "atime", "exec": 853 // These use the default value and don't need to be set. 854 case "bind", "rbind": 855 // These are the same as a mount with type="bind". 856 default: 857 log.Warningf("ignoring unknown mount option %q", o) 858 } 859 } 860 return mountOpts 861 } 862 863 func parseKeyValue(s string) (string, string, bool) { 864 tokens := strings.SplitN(s, "=", 2) 865 if len(tokens) < 2 { 866 return "", "", false 867 } 868 return strings.TrimSpace(tokens[0]), strings.TrimSpace(tokens[1]), true 869 } 870 871 // mountTmp mounts an internal tmpfs at '/tmp' if it's safe to do so. 872 // Technically we don't have to mount tmpfs at /tmp, as we could just rely on 873 // the host /tmp, but this is a nice optimization, and fixes some apps that call 874 // mknod in /tmp. It's unsafe to mount tmpfs if: 875 // 1. /tmp is mounted explicitly: we should not override user's wish 876 // 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp 877 // 878 // Note that when there are submounts inside of '/tmp', directories for the 879 // mount points must be present, making '/tmp' not empty anymore. 880 func (c *containerMounter) mountTmp(ctx context.Context, conf *config.Config, creds *auth.Credentials, mns *vfs.MountNamespace) error { 881 for _, m := range c.mounts { 882 // m.Destination has been cleaned, so it's to use equality here. 883 if m.Destination == "/tmp" { 884 log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m) 885 return nil 886 } 887 } 888 889 root := mns.Root() 890 root.IncRef() 891 defer root.DecRef(ctx) 892 pop := vfs.PathOperation{ 893 Root: root, 894 Start: root, 895 Path: fspath.Parse("/tmp"), 896 } 897 fd, err := c.k.VFS().OpenAt(ctx, creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY}) 898 switch { 899 case err == nil: 900 defer fd.DecRef(ctx) 901 902 err := fd.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error { 903 if dirent.Name != "." && dirent.Name != ".." { 904 return linuxerr.ENOTEMPTY 905 } 906 return nil 907 })) 908 switch { 909 case err == nil: 910 log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`) 911 case linuxerr.Equals(linuxerr.ENOTEMPTY, err): 912 // If more than "." and ".." is found, skip internal tmpfs to prevent 913 // hiding existing files. 914 log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`) 915 return nil 916 default: 917 return fmt.Errorf("fd.IterDirents failed: %v", err) 918 } 919 fallthrough 920 921 case linuxerr.Equals(linuxerr.ENOENT, err): 922 // No '/tmp' found (or fallthrough from above). It's safe to mount internal 923 // tmpfs. 924 tmpMount := specs.Mount{ 925 Type: tmpfs.Name, 926 Destination: "/tmp", 927 // Sticky bit is added to prevent accidental deletion of files from 928 // another user. This is normally done for /tmp. 929 Options: []string{"mode=01777"}, 930 } 931 if _, err := c.mountSubmount(ctx, conf, mns, creds, newNonGoferMountInfo(&tmpMount)); err != nil { 932 return fmt.Errorf("mountSubmount failed: %v", err) 933 } 934 return nil 935 936 case linuxerr.Equals(linuxerr.ENOTDIR, err): 937 // Not a dir?! Let it be. 938 return nil 939 940 default: 941 return fmt.Errorf(`opening "/tmp" inside container: %w`, err) 942 } 943 } 944 945 // processHints processes annotations that container hints about how volumes 946 // should be mounted (e.g. a volume shared between containers). It must be 947 // called for the root container only. 948 func (c *containerMounter) processHints(conf *config.Config, creds *auth.Credentials) error { 949 ctx := c.k.SupervisorContext() 950 for _, hint := range c.hints.mounts { 951 if !hint.shouldShareMount() { 952 continue 953 } 954 955 log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) 956 mnt, err := c.mountSharedMaster(ctx, conf, hint, creds) 957 if err != nil { 958 return fmt.Errorf("mounting shared master %q: %v", hint.name, err) 959 } 960 hint.vfsMount = mnt 961 } 962 return nil 963 } 964 965 // mountSharedMaster mounts the master of a volume that is shared among 966 // containers in a pod. 967 func (c *containerMounter) mountSharedMaster(ctx context.Context, conf *config.Config, hint *MountHint, creds *auth.Credentials) (*vfs.Mount, error) { 968 // Map mount type to filesystem name, and parse out the options that we are 969 // capable of dealing with. 970 mntInfo := newNonGoferMountInfo(&hint.mount) 971 fsName, opts, err := c.getMountNameAndOptions(conf, mntInfo) 972 if err != nil { 973 return nil, err 974 } 975 if len(fsName) == 0 { 976 return nil, fmt.Errorf("mount type not supported %q", hint.mount.Type) 977 } 978 return c.k.VFS().MountDisconnected(ctx, creds, "", fsName, opts) 979 } 980 981 // mountSharedSubmount binds mount to a previously mounted volume that is shared 982 // among containers in the same pod. 983 func (c *containerMounter) mountSharedSubmount(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount *specs.Mount, source *MountHint) (*vfs.Mount, error) { 984 if err := source.checkCompatible(mount); err != nil { 985 return nil, err 986 } 987 988 // Ignore data and useOverlay because these were already applied to 989 // the master mount. 990 _, opts, err := c.getMountNameAndOptions(conf, newNonGoferMountInfo(mount)) 991 if err != nil { 992 return nil, err 993 } 994 newMnt := c.k.VFS().NewDisconnectedMount(source.vfsMount.Filesystem(), source.vfsMount.Root(), opts) 995 defer newMnt.DecRef(ctx) 996 997 root := mns.Root() 998 root.IncRef() 999 defer root.DecRef(ctx) 1000 target := &vfs.PathOperation{ 1001 Root: root, 1002 Start: root, 1003 Path: fspath.Parse(mount.Destination), 1004 } 1005 1006 if err := c.makeMountPoint(ctx, creds, mns, mount.Destination); err != nil { 1007 return nil, fmt.Errorf("creating mount point %q: %w", mount.Destination, err) 1008 } 1009 1010 if err := c.k.VFS().ConnectMountAt(ctx, creds, newMnt, target); err != nil { 1011 return nil, err 1012 } 1013 log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name) 1014 return newMnt, nil 1015 } 1016 1017 func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, dest string) error { 1018 root := mns.Root() 1019 root.IncRef() 1020 defer root.DecRef(ctx) 1021 target := &vfs.PathOperation{ 1022 Root: root, 1023 Start: root, 1024 Path: fspath.Parse(dest), 1025 } 1026 // First check if mount point exists. When overlay is enabled, gofer doesn't 1027 // allow changes to the FS, making MakeSytheticMountpoint() ineffective 1028 // because MkdirAt fails with EROFS even if file exists. 1029 vd, err := c.k.VFS().GetDentryAt(ctx, creds, target, &vfs.GetDentryOptions{}) 1030 if err == nil { 1031 // File exists, we're done. 1032 vd.DecRef(ctx) 1033 return nil 1034 } 1035 return c.k.VFS().MakeSyntheticMountpoint(ctx, dest, root, creds) 1036 } 1037 1038 // configureRestore returns an updated context.Context including filesystem 1039 // state used by restore defined by conf. 1040 func (c *containerMounter) configureRestore(ctx context.Context) (context.Context, error) { 1041 fdmap := make(map[string]int) 1042 fdmap["/"] = c.fds.remove() 1043 mounts, err := c.prepareMounts() 1044 if err != nil { 1045 return ctx, err 1046 } 1047 for i := range c.mounts { 1048 submount := &mounts[i] 1049 if submount.fd >= 0 { 1050 fdmap[submount.mount.Destination] = submount.fd 1051 } 1052 } 1053 return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil 1054 } 1055 1056 func createDeviceFiles(ctx context.Context, creds *auth.Credentials, info *containerInfo, vfsObj *vfs.VirtualFilesystem, root vfs.VirtualDentry) error { 1057 if info.spec.Linux == nil { 1058 return nil 1059 } 1060 for _, dev := range info.spec.Linux.Devices { 1061 pop := vfs.PathOperation{ 1062 Root: root, 1063 Start: root, 1064 Path: fspath.Parse(dev.Path), 1065 } 1066 opts := vfs.MknodOptions{ 1067 Mode: linux.FileMode(dev.FileMode.Perm()), 1068 } 1069 // See https://github.com/opencontainers/runtime-spec/blob/main/config-linux.md#devices. 1070 switch dev.Type { 1071 case "b": 1072 opts.Mode |= linux.S_IFBLK 1073 opts.DevMajor = uint32(dev.Major) 1074 opts.DevMinor = uint32(dev.Minor) 1075 case "c", "u": 1076 opts.Mode |= linux.S_IFCHR 1077 opts.DevMajor = uint32(dev.Major) 1078 opts.DevMinor = uint32(dev.Minor) 1079 case "p": 1080 opts.Mode |= linux.S_IFIFO 1081 default: 1082 return fmt.Errorf("specified device at %q has invalid type %q", dev.Path, dev.Type) 1083 } 1084 if dev.Path == "/dev/nvidia-uvm" && info.nvidiaUVMDevMajor != 0 && opts.DevMajor != info.nvidiaUVMDevMajor { 1085 // nvidia-uvm's major device number is dynamically assigned, so the 1086 // number that it has on the host may differ from the number that 1087 // it has in sentry VFS; switch from the former to the latter. 1088 log.Infof("Switching /dev/nvidia-uvm device major number from %d to %d", dev.Major, info.nvidiaUVMDevMajor) 1089 opts.DevMajor = info.nvidiaUVMDevMajor 1090 } 1091 if err := vfsObj.MkdirAllAt(ctx, path.Dir(dev.Path), root, creds, &vfs.MkdirOptions{ 1092 Mode: 0o755, 1093 }, true /* mustBeDir */); err != nil { 1094 return fmt.Errorf("failed to create ancestor directories of %q: %w", dev.Path, err) 1095 } 1096 // EEXIST is silently ignored; compare 1097 // opencontainers/runc:libcontainer/rootfs_linux.go:createDeviceNode(). 1098 created := true 1099 if err := vfsObj.MknodAt(ctx, creds, &pop, &opts); err != nil && !linuxerr.Equals(linuxerr.EEXIST, err) { 1100 if linuxerr.Equals(linuxerr.EEXIST, err) { 1101 created = false 1102 } else { 1103 return fmt.Errorf("failed to create device file at %q: %w", dev.Path, err) 1104 } 1105 } 1106 if created && (dev.UID != nil || dev.GID != nil) { 1107 var opts vfs.SetStatOptions 1108 if dev.UID != nil { 1109 opts.Stat.Mask |= linux.STATX_UID 1110 opts.Stat.UID = *dev.UID 1111 } 1112 if dev.GID != nil { 1113 opts.Stat.Mask |= linux.STATX_GID 1114 opts.Stat.GID = *dev.GID 1115 } 1116 if err := vfsObj.SetStatAt(ctx, creds, &pop, &opts); err != nil { 1117 return fmt.Errorf("failed to set UID/GID for device file %q: %w", dev.Path, err) 1118 } 1119 } 1120 } 1121 return nil 1122 } 1123 1124 func tpuProxyRegisterDevicesAndCreateFiles(ctx context.Context, info *containerInfo, k *kernel.Kernel, vfsObj *vfs.VirtualFilesystem, a *devtmpfs.Accessor) error { 1125 if !info.conf.TPUProxy { 1126 return nil 1127 } 1128 // At this point /dev/accel just contains the TPU devices have been mounted 1129 // into the sandbox chroot. Enumerate all of them and create sentry devices. 1130 paths, err := filepath.Glob("/dev/accel*") 1131 if err != nil { 1132 return fmt.Errorf("enumerating accel device files: %w", err) 1133 } 1134 for _, path := range paths { 1135 accelDeviceRegex := regexp.MustCompile(`^/dev/accel(\d+)$`) 1136 if ms := accelDeviceRegex.FindStringSubmatch(path); ms != nil { 1137 deviceNum, _ := strconv.ParseUint(ms[1], 10, 32) 1138 if err := accel.Register(vfsObj, uint32(deviceNum)); err != nil { 1139 return fmt.Errorf("registering accel driver: %w", err) 1140 } 1141 if err := accel.CreateDevtmpfsFile(ctx, a, uint32(deviceNum)); err != nil { 1142 return fmt.Errorf("creating accel device file %q: %w", deviceNum, err) 1143 } 1144 } 1145 } 1146 return nil 1147 } 1148 1149 func nvproxyRegisterDevicesAndCreateFiles(ctx context.Context, info *containerInfo, k *kernel.Kernel, vfsObj *vfs.VirtualFilesystem, a *devtmpfs.Accessor) error { 1150 if !specutils.GPUFunctionalityRequested(info.spec, info.conf) { 1151 return nil 1152 } 1153 uvmDevMajor, err := k.VFS().GetDynamicCharDevMajor() 1154 if err != nil { 1155 return fmt.Errorf("reserving device major number for nvidia-uvm: %w", err) 1156 } 1157 if err := nvproxy.Register(vfsObj, uvmDevMajor); err != nil { 1158 return fmt.Errorf("registering nvproxy driver: %w", err) 1159 } 1160 info.nvidiaUVMDevMajor = uvmDevMajor 1161 if info.conf.NVProxyDocker { 1162 // In Docker mode, create all the device files now. 1163 // In non-Docker mode, these are instead created as part of 1164 // `createDeviceFiles`, using the spec's Device list. 1165 nvd, err := specutils.NvidiaDeviceNumbers(info.spec, info.conf) 1166 if err != nil { 1167 return fmt.Errorf("getting nvidia devices: %w", err) 1168 } 1169 if err := nvproxy.CreateDriverDevtmpfsFiles(ctx, a, uvmDevMajor); err != nil { 1170 return fmt.Errorf("creating nvproxy devtmpfs files: %w", err) 1171 } 1172 for _, d := range nvd { 1173 if err := nvproxy.CreateIndexDevtmpfsFile(ctx, a, d); err != nil { 1174 return fmt.Errorf("creating nvproxy devtmpfs file for device %d: %w", d, err) 1175 } 1176 } 1177 } 1178 return nil 1179 }