github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/runsc/boot/vfs.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package boot 16 17 import ( 18 "fmt" 19 "path" 20 "sort" 21 "strings" 22 23 specs "github.com/opencontainers/runtime-spec/specs-go" 24 "github.com/SagerNet/gvisor/pkg/abi/linux" 25 "github.com/SagerNet/gvisor/pkg/cleanup" 26 "github.com/SagerNet/gvisor/pkg/context" 27 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 28 "github.com/SagerNet/gvisor/pkg/fspath" 29 "github.com/SagerNet/gvisor/pkg/log" 30 "github.com/SagerNet/gvisor/pkg/sentry/devices/memdev" 31 "github.com/SagerNet/gvisor/pkg/sentry/devices/ttydev" 32 "github.com/SagerNet/gvisor/pkg/sentry/devices/tundev" 33 "github.com/SagerNet/gvisor/pkg/sentry/fs/user" 34 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/cgroupfs" 35 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/devpts" 36 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/devtmpfs" 37 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/fuse" 38 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/gofer" 39 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/overlay" 40 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/proc" 41 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/sys" 42 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/tmpfs" 43 "github.com/SagerNet/gvisor/pkg/sentry/fsimpl/verity" 44 "github.com/SagerNet/gvisor/pkg/sentry/inet" 45 "github.com/SagerNet/gvisor/pkg/sentry/kernel" 46 "github.com/SagerNet/gvisor/pkg/sentry/kernel/auth" 47 "github.com/SagerNet/gvisor/pkg/sentry/vfs" 48 "github.com/SagerNet/gvisor/runsc/config" 49 "github.com/SagerNet/gvisor/runsc/specutils" 50 ) 51 52 func registerFilesystems(k *kernel.Kernel) error { 53 ctx := k.SupervisorContext() 54 creds := auth.NewRootCredentials(k.RootUserNamespace()) 55 vfsObj := k.VFS() 56 57 vfsObj.MustRegisterFilesystemType(cgroupfs.Name, &cgroupfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 58 AllowUserMount: true, 59 AllowUserList: true, 60 }) 61 vfsObj.MustRegisterFilesystemType(devpts.Name, &devpts.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 62 AllowUserList: true, 63 // TODO(b/29356795): Users may mount this once the terminals are in a 64 // usable state. 65 AllowUserMount: false, 66 }) 67 vfsObj.MustRegisterFilesystemType(devtmpfs.Name, &devtmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 68 AllowUserMount: true, 69 AllowUserList: true, 70 }) 71 vfsObj.MustRegisterFilesystemType(fuse.Name, &fuse.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 72 AllowUserMount: true, 73 AllowUserList: true, 74 }) 75 vfsObj.MustRegisterFilesystemType(gofer.Name, &gofer.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 76 AllowUserList: true, 77 }) 78 vfsObj.MustRegisterFilesystemType(overlay.Name, &overlay.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 79 AllowUserMount: true, 80 AllowUserList: true, 81 }) 82 vfsObj.MustRegisterFilesystemType(proc.Name, &proc.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 83 AllowUserMount: true, 84 AllowUserList: true, 85 }) 86 vfsObj.MustRegisterFilesystemType(sys.Name, &sys.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 87 AllowUserMount: true, 88 AllowUserList: true, 89 }) 90 vfsObj.MustRegisterFilesystemType(tmpfs.Name, &tmpfs.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 91 AllowUserMount: true, 92 AllowUserList: true, 93 }) 94 vfsObj.MustRegisterFilesystemType(verity.Name, &verity.FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ 95 AllowUserList: true, 96 AllowUserMount: true, 97 }) 98 99 // Setup files in devtmpfs. 100 if err := memdev.Register(vfsObj); err != nil { 101 return fmt.Errorf("registering memdev: %w", err) 102 } 103 if err := ttydev.Register(vfsObj); err != nil { 104 return fmt.Errorf("registering ttydev: %w", err) 105 } 106 tunSupported := tundev.IsNetTunSupported(inet.StackFromContext(ctx)) 107 if tunSupported { 108 if err := tundev.Register(vfsObj); err != nil { 109 return fmt.Errorf("registering tundev: %v", err) 110 } 111 } 112 113 if kernel.FUSEEnabled { 114 if err := fuse.Register(vfsObj); err != nil { 115 return fmt.Errorf("registering fusedev: %w", err) 116 } 117 } 118 119 a, err := devtmpfs.NewAccessor(ctx, vfsObj, creds, devtmpfs.Name) 120 if err != nil { 121 return fmt.Errorf("creating devtmpfs accessor: %w", err) 122 } 123 defer a.Release(ctx) 124 125 if err := a.UserspaceInit(ctx); err != nil { 126 return fmt.Errorf("initializing userspace: %w", err) 127 } 128 if err := memdev.CreateDevtmpfsFiles(ctx, a); err != nil { 129 return fmt.Errorf("creating memdev devtmpfs files: %w", err) 130 } 131 if err := ttydev.CreateDevtmpfsFiles(ctx, a); err != nil { 132 return fmt.Errorf("creating ttydev devtmpfs files: %w", err) 133 } 134 if tunSupported { 135 if err := tundev.CreateDevtmpfsFiles(ctx, a); err != nil { 136 return fmt.Errorf("creating tundev devtmpfs files: %v", err) 137 } 138 } 139 140 if kernel.FUSEEnabled { 141 if err := fuse.CreateDevtmpfsFile(ctx, a); err != nil { 142 return fmt.Errorf("creating fusedev devtmpfs files: %w", err) 143 } 144 } 145 146 return nil 147 } 148 149 func setupContainerVFS2(ctx context.Context, conf *config.Config, mntr *containerMounter, procArgs *kernel.CreateProcessArgs) error { 150 mns, err := mntr.mountAll(conf, procArgs) 151 if err != nil { 152 return fmt.Errorf("failed to setupFS: %w", err) 153 } 154 procArgs.MountNamespaceVFS2 = mns 155 156 // Resolve the executable path from working dir and environment. 157 resolved, err := user.ResolveExecutablePath(ctx, procArgs) 158 if err != nil { 159 return err 160 } 161 procArgs.Filename = resolved 162 return nil 163 } 164 165 func (c *containerMounter) mountAll(conf *config.Config, procArgs *kernel.CreateProcessArgs) (*vfs.MountNamespace, error) { 166 log.Infof("Configuring container's file system with VFS2") 167 168 // Create context with root credentials to mount the filesystem (the current 169 // user may not be privileged enough). 170 rootCreds := auth.NewRootCredentials(procArgs.Credentials.UserNamespace) 171 rootProcArgs := *procArgs 172 rootProcArgs.WorkingDirectory = "/" 173 rootProcArgs.Credentials = rootCreds 174 rootProcArgs.Umask = 0022 175 rootProcArgs.MaxSymlinkTraversals = linux.MaxSymlinkTraversals 176 rootCtx := procArgs.NewContext(c.k) 177 178 mns, err := c.createMountNamespaceVFS2(rootCtx, conf, rootCreds) 179 if err != nil { 180 return nil, fmt.Errorf("creating mount namespace: %w", err) 181 } 182 rootProcArgs.MountNamespaceVFS2 = mns 183 184 root := mns.Root() 185 root.IncRef() 186 defer root.DecRef(rootCtx) 187 if root.Mount().ReadOnly() { 188 // Switch to ReadWrite while we setup submounts. 189 if err := c.k.VFS().SetMountReadOnly(root.Mount(), false); err != nil { 190 return nil, fmt.Errorf(`failed to set mount at "/" readwrite: %w`, err) 191 } 192 // Restore back to ReadOnly at the end. 193 defer func() { 194 if err := c.k.VFS().SetMountReadOnly(root.Mount(), true); err != nil { 195 panic(fmt.Sprintf(`failed to restore mount at "/" back to readonly: %v`, err)) 196 } 197 }() 198 } 199 200 // Mount submounts. 201 if err := c.mountSubmountsVFS2(rootCtx, conf, mns, rootCreds); err != nil { 202 return nil, fmt.Errorf("mounting submounts vfs2: %w", err) 203 } 204 205 return mns, nil 206 } 207 208 // createMountNamespaceVFS2 creates the container's root mount and namespace. 209 func (c *containerMounter) createMountNamespaceVFS2(ctx context.Context, conf *config.Config, creds *auth.Credentials) (*vfs.MountNamespace, error) { 210 fd := c.fds.remove() 211 data := p9MountData(fd, conf.FileAccess, true /* vfs2 */) 212 213 // We can't check for overlayfs here because sandbox is chroot'ed and gofer 214 // can only send mount options for specs.Mounts (specs.Root is missing 215 // Options field). So assume root is always on top of overlayfs. 216 data = append(data, "overlayfs_stale_read") 217 218 log.Infof("Mounting root over 9P, ioFD: %d", fd) 219 opts := &vfs.MountOptions{ 220 ReadOnly: c.root.Readonly, 221 GetFilesystemOptions: vfs.GetFilesystemOptions{ 222 Data: strings.Join(data, ","), 223 InternalData: gofer.InternalFilesystemOptions{ 224 UniqueID: "/", 225 }, 226 }, 227 InternalMount: true, 228 } 229 230 fsName := gofer.Name 231 if conf.Overlay && !c.root.Readonly { 232 log.Infof("Adding overlay on top of root") 233 var err error 234 var cleanup func() 235 opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) 236 if err != nil { 237 return nil, fmt.Errorf("mounting root with overlay: %w", err) 238 } 239 defer cleanup() 240 fsName = overlay.Name 241 } 242 243 mns, err := c.k.VFS().NewMountNamespace(ctx, creds, "", fsName, opts) 244 if err != nil { 245 return nil, fmt.Errorf("setting up mount namespace: %w", err) 246 } 247 return mns, nil 248 } 249 250 // configureOverlay mounts the lower layer using "lowerOpts", mounts the upper 251 // layer using tmpfs, and return overlay mount options. "cleanup" must be called 252 // after the options have been used to mount the overlay, to release refs on 253 // lower and upper mounts. 254 func (c *containerMounter) configureOverlay(ctx context.Context, creds *auth.Credentials, lowerOpts *vfs.MountOptions, lowerFSName string) (*vfs.MountOptions, func(), error) { 255 // First copy options from lower layer to upper layer and overlay. Clear 256 // filesystem specific options. 257 upperOpts := *lowerOpts 258 upperOpts.GetFilesystemOptions = vfs.GetFilesystemOptions{} 259 260 overlayOpts := *lowerOpts 261 overlayOpts.GetFilesystemOptions = vfs.GetFilesystemOptions{} 262 263 // All writes go to the upper layer, be paranoid and make lower readonly. 264 lowerOpts.ReadOnly = true 265 lower, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, lowerFSName, lowerOpts) 266 if err != nil { 267 return nil, nil, err 268 } 269 cu := cleanup.Make(func() { lower.DecRef(ctx) }) 270 defer cu.Clean() 271 272 // Determine the lower layer's root's type. 273 lowerRootVD := vfs.MakeVirtualDentry(lower, lower.Root()) 274 stat, err := c.k.VFS().StatAt(ctx, creds, &vfs.PathOperation{ 275 Root: lowerRootVD, 276 Start: lowerRootVD, 277 }, &vfs.StatOptions{ 278 Mask: linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE | linux.STATX_TYPE, 279 }) 280 if err != nil { 281 return nil, nil, fmt.Errorf("failed to stat lower layer's root: %v", err) 282 } 283 if stat.Mask&linux.STATX_TYPE == 0 { 284 return nil, nil, fmt.Errorf("failed to get file type of lower layer's root") 285 } 286 rootType := stat.Mode & linux.S_IFMT 287 if rootType != linux.S_IFDIR && rootType != linux.S_IFREG { 288 return nil, nil, fmt.Errorf("lower layer's root has unsupported file type %v", rootType) 289 } 290 291 // Upper is a tmpfs mount to keep all modifications inside the sandbox. 292 upperOpts.GetFilesystemOptions.InternalData = tmpfs.FilesystemOpts{ 293 RootFileType: uint16(rootType), 294 } 295 upper, err := c.k.VFS().MountDisconnected(ctx, creds, "" /* source */, tmpfs.Name, &upperOpts) 296 if err != nil { 297 return nil, nil, fmt.Errorf("failed to create upper layer for overlay, opts: %+v: %v", upperOpts, err) 298 } 299 cu.Add(func() { upper.DecRef(ctx) }) 300 301 // If the overlay mount consists of a regular file, copy up its contents 302 // from the lower layer, since in the overlay the otherwise-empty upper 303 // layer file will take precedence. 304 upperRootVD := vfs.MakeVirtualDentry(upper, upper.Root()) 305 if rootType == linux.S_IFREG { 306 lowerFD, err := c.k.VFS().OpenAt(ctx, creds, &vfs.PathOperation{ 307 Root: lowerRootVD, 308 Start: lowerRootVD, 309 }, &vfs.OpenOptions{ 310 Flags: linux.O_RDONLY, 311 }) 312 if err != nil { 313 return nil, nil, fmt.Errorf("failed to open lower layer root for copying: %v", err) 314 } 315 defer lowerFD.DecRef(ctx) 316 upperFD, err := c.k.VFS().OpenAt(ctx, creds, &vfs.PathOperation{ 317 Root: upperRootVD, 318 Start: upperRootVD, 319 }, &vfs.OpenOptions{ 320 Flags: linux.O_WRONLY, 321 }) 322 if err != nil { 323 return nil, nil, fmt.Errorf("failed to open upper layer root for copying: %v", err) 324 } 325 defer upperFD.DecRef(ctx) 326 if _, err := vfs.CopyRegularFileData(ctx, upperFD, lowerFD); err != nil { 327 return nil, nil, fmt.Errorf("failed to copy up overlay file: %v", err) 328 } 329 } 330 331 // Propagate the lower layer's root's owner, group, and mode to the upper 332 // layer's root for consistency with VFS1. 333 err = c.k.VFS().SetStatAt(ctx, creds, &vfs.PathOperation{ 334 Root: upperRootVD, 335 Start: upperRootVD, 336 }, &vfs.SetStatOptions{ 337 Stat: linux.Statx{ 338 Mask: (linux.STATX_UID | linux.STATX_GID | linux.STATX_MODE) & stat.Mask, 339 UID: stat.UID, 340 GID: stat.GID, 341 Mode: stat.Mode, 342 }, 343 }) 344 if err != nil { 345 return nil, nil, err 346 } 347 348 // Configure overlay with both layers. 349 overlayOpts.GetFilesystemOptions.InternalData = overlay.FilesystemOptions{ 350 UpperRoot: upperRootVD, 351 LowerRoots: []vfs.VirtualDentry{lowerRootVD}, 352 } 353 return &overlayOpts, cu.Release(), nil 354 } 355 356 func (c *containerMounter) mountSubmountsVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials) error { 357 mounts, err := c.prepareMountsVFS2() 358 if err != nil { 359 return err 360 } 361 362 for i := range mounts { 363 submount := &mounts[i] 364 log.Debugf("Mounting %q to %q, type: %s, options: %s", submount.mount.Source, submount.mount.Destination, submount.mount.Type, submount.mount.Options) 365 var ( 366 mnt *vfs.Mount 367 err error 368 ) 369 370 if hint := c.hints.findMount(submount.mount); hint != nil && hint.isSupported() { 371 mnt, err = c.mountSharedSubmountVFS2(ctx, conf, mns, creds, submount.mount, hint) 372 if err != nil { 373 return fmt.Errorf("mount shared mount %q to %q: %v", hint.name, submount.mount.Destination, err) 374 } 375 } else { 376 mnt, err = c.mountSubmountVFS2(ctx, conf, mns, creds, submount) 377 if err != nil { 378 return fmt.Errorf("mount submount %q: %w", submount.mount.Destination, err) 379 } 380 } 381 382 if mnt != nil && mnt.ReadOnly() { 383 // Switch to ReadWrite while we setup submounts. 384 if err := c.k.VFS().SetMountReadOnly(mnt, false); err != nil { 385 return fmt.Errorf("failed to set mount at %q readwrite: %w", submount.mount.Destination, err) 386 } 387 // Restore back to ReadOnly at the end. 388 defer func() { 389 if err := c.k.VFS().SetMountReadOnly(mnt, true); err != nil { 390 panic(fmt.Sprintf("failed to restore mount at %q back to readonly: %v", submount.mount.Destination, err)) 391 } 392 }() 393 } 394 } 395 396 if err := c.mountTmpVFS2(ctx, conf, creds, mns); err != nil { 397 return fmt.Errorf(`mount submount "\tmp": %w`, err) 398 } 399 return nil 400 } 401 402 type mountAndFD struct { 403 mount *specs.Mount 404 fd int 405 } 406 407 func (c *containerMounter) prepareMountsVFS2() ([]mountAndFD, error) { 408 // Associate bind mounts with their FDs before sorting since there is an 409 // undocumented assumption that FDs are dispensed in the order in which 410 // they are required by mounts. 411 var mounts []mountAndFD 412 for i := range c.mounts { 413 m := &c.mounts[i] 414 specutils.MaybeConvertToBindMount(m) 415 416 // Only bind mounts use host FDs; see 417 // containerMounter.getMountNameAndOptionsVFS2. 418 fd := -1 419 if m.Type == bind { 420 fd = c.fds.remove() 421 } 422 mounts = append(mounts, mountAndFD{ 423 mount: m, 424 fd: fd, 425 }) 426 } 427 if err := c.checkDispenser(); err != nil { 428 return nil, err 429 } 430 431 // Sort the mounts so that we don't place children before parents. 432 sort.Slice(mounts, func(i, j int) bool { 433 return len(mounts[i].mount.Destination) < len(mounts[j].mount.Destination) 434 }) 435 436 return mounts, nil 437 } 438 439 func (c *containerMounter) mountSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, submount *mountAndFD) (*vfs.Mount, error) { 440 fsName, opts, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, submount) 441 if err != nil { 442 return nil, fmt.Errorf("mountOptions failed: %w", err) 443 } 444 if len(fsName) == 0 { 445 // Filesystem is not supported (e.g. cgroup), just skip it. 446 return nil, nil 447 } 448 449 if err := c.makeMountPoint(ctx, creds, mns, submount.mount.Destination); err != nil { 450 return nil, fmt.Errorf("creating mount point %q: %w", submount.mount.Destination, err) 451 } 452 453 if useOverlay { 454 log.Infof("Adding overlay on top of mount %q", submount.mount.Destination) 455 var cleanup func() 456 opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) 457 if err != nil { 458 return nil, fmt.Errorf("mounting volume with overlay at %q: %w", submount.mount.Destination, err) 459 } 460 defer cleanup() 461 fsName = overlay.Name 462 } 463 464 root := mns.Root() 465 root.IncRef() 466 defer root.DecRef(ctx) 467 target := &vfs.PathOperation{ 468 Root: root, 469 Start: root, 470 Path: fspath.Parse(submount.mount.Destination), 471 } 472 mnt, err := c.k.VFS().MountAt(ctx, creds, "", target, fsName, opts) 473 if err != nil { 474 return nil, fmt.Errorf("failed to mount %q (type: %s): %w, opts: %v", submount.mount.Destination, submount.mount.Type, err, opts) 475 } 476 log.Infof("Mounted %q to %q type: %s, internal-options: %q", submount.mount.Source, submount.mount.Destination, submount.mount.Type, opts.GetFilesystemOptions.Data) 477 return mnt, nil 478 } 479 480 // getMountNameAndOptionsVFS2 retrieves the fsName, opts, and useOverlay values 481 // used for mounts. 482 func (c *containerMounter) getMountNameAndOptionsVFS2(conf *config.Config, m *mountAndFD) (string, *vfs.MountOptions, bool, error) { 483 fsName := m.mount.Type 484 useOverlay := false 485 var ( 486 data []string 487 internalData interface{} 488 ) 489 490 verityData, verityOpts, verityRequested, remainingMOpts, err := parseVerityMountOptions(m.mount.Options) 491 if err != nil { 492 return "", nil, false, err 493 } 494 m.mount.Options = remainingMOpts 495 496 // Find filesystem name and FS specific data field. 497 switch m.mount.Type { 498 case devpts.Name, devtmpfs.Name, proc.Name, sys.Name: 499 // Nothing to do. 500 501 case nonefs: 502 fsName = sys.Name 503 504 case tmpfs.Name: 505 var err error 506 data, err = parseAndFilterOptions(m.mount.Options, tmpfsAllowedData...) 507 if err != nil { 508 return "", nil, false, err 509 } 510 511 case bind: 512 fsName = gofer.Name 513 if m.fd == 0 { 514 // Check that an FD was provided to fails fast. Technically FD=0 is valid, 515 // but unlikely to be correct in this context. 516 return "", nil, false, fmt.Errorf("9P mount requires a connection FD") 517 } 518 data = p9MountData(m.fd, c.getMountAccessType(conf, m.mount), true /* vfs2 */) 519 internalData = gofer.InternalFilesystemOptions{ 520 UniqueID: m.mount.Destination, 521 } 522 523 // If configured, add overlay to all writable mounts. 524 useOverlay = conf.Overlay && !mountFlags(m.mount.Options).ReadOnly 525 526 case cgroupfs.Name: 527 var err error 528 data, err = parseAndFilterOptions(m.mount.Options, cgroupfs.SupportedMountOptions...) 529 if err != nil { 530 return "", nil, false, err 531 } 532 533 default: 534 log.Warningf("ignoring unknown filesystem type %q", m.mount.Type) 535 return "", nil, false, nil 536 } 537 538 opts := &vfs.MountOptions{ 539 GetFilesystemOptions: vfs.GetFilesystemOptions{ 540 Data: strings.Join(data, ","), 541 InternalData: internalData, 542 }, 543 InternalMount: true, 544 } 545 546 for _, o := range m.mount.Options { 547 switch o { 548 case "rw": 549 opts.ReadOnly = false 550 case "ro": 551 opts.ReadOnly = true 552 case "noatime": 553 opts.Flags.NoATime = true 554 case "noexec": 555 opts.Flags.NoExec = true 556 case "bind", "rbind": 557 // These are the same as a mount with type="bind". 558 default: 559 log.Warningf("ignoring unknown mount option %q", o) 560 } 561 } 562 563 if verityRequested { 564 verityData = verityData + "root_name=" + path.Base(m.mount.Destination) 565 verityOpts.LowerName = fsName 566 verityOpts.LowerGetFSOptions = opts.GetFilesystemOptions 567 fsName = verity.Name 568 opts = &vfs.MountOptions{ 569 GetFilesystemOptions: vfs.GetFilesystemOptions{ 570 Data: verityData, 571 InternalData: verityOpts, 572 }, 573 InternalMount: true, 574 } 575 } 576 577 return fsName, opts, useOverlay, nil 578 } 579 580 func parseKeyValue(s string) (string, string, bool) { 581 tokens := strings.SplitN(s, "=", 2) 582 if len(tokens) < 2 { 583 return "", "", false 584 } 585 return strings.TrimSpace(tokens[0]), strings.TrimSpace(tokens[1]), true 586 } 587 588 // parseAndFilterOptions scans the provided mount options for verity-related 589 // mount options. It returns the parsed set of verity mount options, as well as 590 // the filtered set of mount options unrelated to verity. 591 func parseVerityMountOptions(mopts []string) (string, verity.InternalFilesystemOptions, bool, []string, error) { 592 nonVerity := []string{} 593 found := false 594 var rootHash string 595 verityOpts := verity.InternalFilesystemOptions{ 596 Action: verity.PanicOnViolation, 597 } 598 for _, o := range mopts { 599 if !strings.HasPrefix(o, "verity.") { 600 nonVerity = append(nonVerity, o) 601 continue 602 } 603 604 k, v, ok := parseKeyValue(o) 605 if !ok { 606 return "", verityOpts, found, nonVerity, fmt.Errorf("invalid verity mount option with no value: %q", o) 607 } 608 609 found = true 610 switch k { 611 case "verity.roothash": 612 rootHash = v 613 case "verity.action": 614 switch v { 615 case "error": 616 verityOpts.Action = verity.ErrorOnViolation 617 case "panic": 618 verityOpts.Action = verity.PanicOnViolation 619 default: 620 log.Warningf("Invalid verity action %q", v) 621 verityOpts.Action = verity.PanicOnViolation 622 } 623 default: 624 return "", verityOpts, found, nonVerity, fmt.Errorf("unknown verity mount option: %q", k) 625 } 626 } 627 verityOpts.AllowRuntimeEnable = len(rootHash) == 0 628 verityData := "root_hash=" + rootHash + "," 629 return verityData, verityOpts, found, nonVerity, nil 630 } 631 632 // mountTmpVFS2 mounts an internal tmpfs at '/tmp' if it's safe to do so. 633 // Technically we don't have to mount tmpfs at /tmp, as we could just rely on 634 // the host /tmp, but this is a nice optimization, and fixes some apps that call 635 // mknod in /tmp. It's unsafe to mount tmpfs if: 636 // 1. /tmp is mounted explicitly: we should not override user's wish 637 // 2. /tmp is not empty: mounting tmpfs would hide existing files in /tmp 638 // 639 // Note that when there are submounts inside of '/tmp', directories for the 640 // mount points must be present, making '/tmp' not empty anymore. 641 func (c *containerMounter) mountTmpVFS2(ctx context.Context, conf *config.Config, creds *auth.Credentials, mns *vfs.MountNamespace) error { 642 for _, m := range c.mounts { 643 // m.Destination has been cleaned, so it's to use equality here. 644 if m.Destination == "/tmp" { 645 log.Debugf(`Explict "/tmp" mount found, skipping internal tmpfs, mount: %+v`, m) 646 return nil 647 } 648 } 649 650 root := mns.Root() 651 root.IncRef() 652 defer root.DecRef(ctx) 653 pop := vfs.PathOperation{ 654 Root: root, 655 Start: root, 656 Path: fspath.Parse("/tmp"), 657 } 658 fd, err := c.k.VFS().OpenAt(ctx, creds, &pop, &vfs.OpenOptions{Flags: linux.O_RDONLY | linux.O_DIRECTORY}) 659 switch { 660 case err == nil: 661 defer fd.DecRef(ctx) 662 663 err := fd.IterDirents(ctx, vfs.IterDirentsCallbackFunc(func(dirent vfs.Dirent) error { 664 if dirent.Name != "." && dirent.Name != ".." { 665 return linuxerr.ENOTEMPTY 666 } 667 return nil 668 })) 669 switch { 670 case err == nil: 671 log.Infof(`Mounting internal tmpfs on top of empty "/tmp"`) 672 case linuxerr.Equals(linuxerr.ENOTEMPTY, err): 673 // If more than "." and ".." is found, skip internal tmpfs to prevent 674 // hiding existing files. 675 log.Infof(`Skipping internal tmpfs mount for "/tmp" because it's not empty`) 676 return nil 677 default: 678 return err 679 } 680 fallthrough 681 682 case linuxerr.Equals(linuxerr.ENOENT, err): 683 // No '/tmp' found (or fallthrough from above). It's safe to mount internal 684 // tmpfs. 685 tmpMount := specs.Mount{ 686 Type: tmpfs.Name, 687 Destination: "/tmp", 688 // Sticky bit is added to prevent accidental deletion of files from 689 // another user. This is normally done for /tmp. 690 Options: []string{"mode=01777"}, 691 } 692 _, err := c.mountSubmountVFS2(ctx, conf, mns, creds, &mountAndFD{mount: &tmpMount}) 693 return err 694 695 case linuxerr.Equals(linuxerr.ENOTDIR, err): 696 // Not a dir?! Let it be. 697 return nil 698 699 default: 700 return fmt.Errorf(`opening "/tmp" inside container: %w`, err) 701 } 702 } 703 704 // processHintsVFS2 processes annotations that container hints about how volumes 705 // should be mounted (e.g. a volume shared between containers). It must be 706 // called for the root container only. 707 func (c *containerMounter) processHintsVFS2(conf *config.Config, creds *auth.Credentials) error { 708 ctx := c.k.SupervisorContext() 709 for _, hint := range c.hints.mounts { 710 // TODO(b/142076984): Only support tmpfs for now. Bind mounts require a 711 // common gofer to mount all shared volumes. 712 if hint.mount.Type != tmpfs.Name { 713 continue 714 } 715 716 log.Infof("Mounting master of shared mount %q from %q type %q", hint.name, hint.mount.Source, hint.mount.Type) 717 mnt, err := c.mountSharedMasterVFS2(ctx, conf, hint, creds) 718 if err != nil { 719 return fmt.Errorf("mounting shared master %q: %v", hint.name, err) 720 } 721 hint.vfsMount = mnt 722 } 723 return nil 724 } 725 726 // mountSharedMasterVFS2 mounts the master of a volume that is shared among 727 // containers in a pod. 728 func (c *containerMounter) mountSharedMasterVFS2(ctx context.Context, conf *config.Config, hint *mountHint, creds *auth.Credentials) (*vfs.Mount, error) { 729 // Map mount type to filesystem name, and parse out the options that we are 730 // capable of dealing with. 731 mntFD := &mountAndFD{mount: &hint.mount} 732 fsName, opts, useOverlay, err := c.getMountNameAndOptionsVFS2(conf, mntFD) 733 if err != nil { 734 return nil, err 735 } 736 if len(fsName) == 0 { 737 return nil, fmt.Errorf("mount type not supported %q", hint.mount.Type) 738 } 739 740 if useOverlay { 741 log.Infof("Adding overlay on top of shared mount %q", mntFD.mount.Destination) 742 var cleanup func() 743 opts, cleanup, err = c.configureOverlay(ctx, creds, opts, fsName) 744 if err != nil { 745 return nil, fmt.Errorf("mounting shared volume with overlay at %q: %w", mntFD.mount.Destination, err) 746 } 747 defer cleanup() 748 fsName = overlay.Name 749 } 750 751 return c.k.VFS().MountDisconnected(ctx, creds, "", fsName, opts) 752 } 753 754 // mountSharedSubmount binds mount to a previously mounted volume that is shared 755 // among containers in the same pod. 756 func (c *containerMounter) mountSharedSubmountVFS2(ctx context.Context, conf *config.Config, mns *vfs.MountNamespace, creds *auth.Credentials, mount *specs.Mount, source *mountHint) (*vfs.Mount, error) { 757 if err := source.checkCompatible(mount); err != nil { 758 return nil, err 759 } 760 761 // Ignore data and useOverlay because these were already applied to 762 // the master mount. 763 _, opts, _, err := c.getMountNameAndOptionsVFS2(conf, &mountAndFD{mount: mount}) 764 if err != nil { 765 return nil, err 766 } 767 newMnt, err := c.k.VFS().NewDisconnectedMount(source.vfsMount.Filesystem(), source.vfsMount.Root(), opts) 768 if err != nil { 769 return nil, err 770 } 771 defer newMnt.DecRef(ctx) 772 773 root := mns.Root() 774 root.IncRef() 775 defer root.DecRef(ctx) 776 target := &vfs.PathOperation{ 777 Root: root, 778 Start: root, 779 Path: fspath.Parse(mount.Destination), 780 } 781 782 if err := c.makeMountPoint(ctx, creds, mns, mount.Destination); err != nil { 783 return nil, fmt.Errorf("creating mount point %q: %w", mount.Destination, err) 784 } 785 786 if err := c.k.VFS().ConnectMountAt(ctx, creds, newMnt, target); err != nil { 787 return nil, err 788 } 789 log.Infof("Mounted %q type shared bind to %q", mount.Destination, source.name) 790 return newMnt, nil 791 } 792 793 func (c *containerMounter) makeMountPoint(ctx context.Context, creds *auth.Credentials, mns *vfs.MountNamespace, dest string) error { 794 root := mns.Root() 795 root.IncRef() 796 defer root.DecRef(ctx) 797 target := &vfs.PathOperation{ 798 Root: root, 799 Start: root, 800 Path: fspath.Parse(dest), 801 } 802 // First check if mount point exists. When overlay is enabled, gofer doesn't 803 // allow changes to the FS, making MakeSytheticMountpoint() ineffective 804 // because MkdirAt fails with EROFS even if file exists. 805 vd, err := c.k.VFS().GetDentryAt(ctx, creds, target, &vfs.GetDentryOptions{}) 806 if err == nil { 807 // File exists, we're done. 808 vd.DecRef(ctx) 809 return nil 810 } 811 return c.k.VFS().MakeSyntheticMountpoint(ctx, dest, root, creds) 812 } 813 814 // configureRestore returns an updated context.Context including filesystem 815 // state used by restore defined by conf. 816 func (c *containerMounter) configureRestore(ctx context.Context) (context.Context, error) { 817 fdmap := make(map[string]int) 818 fdmap["/"] = c.fds.remove() 819 mounts, err := c.prepareMountsVFS2() 820 if err != nil { 821 return ctx, err 822 } 823 for i := range c.mounts { 824 submount := &mounts[i] 825 if submount.fd >= 0 { 826 fdmap[submount.mount.Destination] = submount.fd 827 } 828 } 829 return context.WithValue(ctx, gofer.CtxRestoreServerFDMap, fdmap), nil 830 }