github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/vfs/mount.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs 16 17 import ( 18 "bytes" 19 "fmt" 20 "math" 21 "sort" 22 "strings" 23 24 "github.com/MerlinKodo/gvisor/pkg/abi/linux" 25 "github.com/MerlinKodo/gvisor/pkg/atomicbitops" 26 "github.com/MerlinKodo/gvisor/pkg/context" 27 "github.com/MerlinKodo/gvisor/pkg/errors/linuxerr" 28 "github.com/MerlinKodo/gvisor/pkg/refs" 29 "github.com/MerlinKodo/gvisor/pkg/sentry/kernel/auth" 30 ) 31 32 // MountMax is the maximum number of mounts allowed. In Linux this can be 33 // configured by the user at /proc/sys/fs/mount-max, but the default is 34 // 100,000. We set the gVisor limit to 10,000. 35 const MountMax = 10000 36 37 // A Mount is a replacement of a Dentry (Mount.key.point) from one Filesystem 38 // (Mount.key.parent.fs) with a Dentry (Mount.root) from another Filesystem 39 // (Mount.fs), which applies to path resolution in the context of a particular 40 // Mount (Mount.key.parent). 41 // 42 // Mounts are reference-counted. Unless otherwise specified, all Mount methods 43 // require that a reference is held. 44 // 45 // Mount and Filesystem are distinct types because it's possible for a single 46 // Filesystem to be mounted at multiple locations and/or in multiple mount 47 // namespaces. 48 // 49 // Mount is analogous to Linux's struct mount. (gVisor does not distinguish 50 // between struct mount and struct vfsmount.) 51 // 52 // +stateify savable 53 type Mount struct { 54 // vfs, fs, root are immutable. References are held on fs and root. 55 // Note that for a disconnected mount, root may be nil. 56 // 57 // Invariant: if not nil, root belongs to fs. 58 vfs *VirtualFilesystem 59 fs *Filesystem 60 root *Dentry 61 62 // ID is the immutable mount ID. 63 ID uint64 64 65 // Flags contains settings as specified for mount(2), e.g. MS_NOEXEC, except 66 // for MS_RDONLY which is tracked in "writers". Immutable. 67 Flags MountFlags 68 69 // key is protected by VirtualFilesystem.mountMu and 70 // VirtualFilesystem.mounts.seq, and may be nil. References are held on 71 // key.parent and key.point if they are not nil. 72 // 73 // Invariant: key.parent != nil iff key.point != nil. key.point belongs to 74 // key.parent.fs. 75 key mountKey `state:".(VirtualDentry)"` 76 77 // ns is the namespace in which this Mount was mounted. ns is protected by 78 // VirtualFilesystem.mountMu. 79 ns *MountNamespace 80 81 // The lower 63 bits of refs are a reference count. The MSB of refs is set 82 // if the Mount has been eagerly umounted, as by umount(2) without the 83 // MNT_DETACH flag. refs is accessed using atomic memory operations. 84 refs atomicbitops.Int64 85 86 // children is the set of all Mounts for which Mount.key.parent is this 87 // Mount. children is protected by VirtualFilesystem.mountMu. 88 children map[*Mount]struct{} 89 90 // isShared indicates this mount has the MS_SHARED propagation type. 91 isShared bool 92 93 // sharedEntry represents an entry in a circular list (ring) of mounts in a 94 // shared peer group. 95 sharedEntry mountEntry 96 97 // groupID is the ID for this mount's shared peer group. If the mount is not 98 // in a peer group, this is 0. 99 groupID uint32 100 101 // umounted is true if VFS.umountRecursiveLocked() has been called on this 102 // Mount. VirtualFilesystem does not hold a reference on Mounts for which 103 // umounted is true. umounted is protected by VirtualFilesystem.mountMu. 104 umounted bool 105 106 // The lower 63 bits of writers is the number of calls to 107 // Mount.CheckBeginWrite() that have not yet been paired with a call to 108 // Mount.EndWrite(). The MSB of writers is set if MS_RDONLY is in effect. 109 // writers is accessed using atomic memory operations. 110 writers atomicbitops.Int64 111 112 // pendingChildren is a list of new child mounts that have not yet been 113 // connected to this mount as the parent. 114 pendingChildren []*Mount 115 } 116 117 func newMount(vfs *VirtualFilesystem, fs *Filesystem, root *Dentry, mntns *MountNamespace, opts *MountOptions) *Mount { 118 mnt := &Mount{ 119 ID: vfs.lastMountID.Add(1), 120 Flags: opts.Flags, 121 vfs: vfs, 122 fs: fs, 123 root: root, 124 ns: mntns, 125 isShared: false, 126 refs: atomicbitops.FromInt64(1), 127 } 128 if opts.ReadOnly { 129 mnt.setReadOnlyLocked(true) 130 } 131 mnt.sharedEntry.Init(mnt) 132 refs.Register(mnt) 133 return mnt 134 } 135 136 // Options returns a copy of the MountOptions currently applicable to mnt. 137 func (mnt *Mount) Options() MountOptions { 138 mnt.vfs.lockMounts() 139 defer mnt.vfs.unlockMounts(context.Background()) 140 return MountOptions{ 141 Flags: mnt.Flags, 142 ReadOnly: mnt.ReadOnly(), 143 } 144 } 145 146 func (mnt *Mount) generateOptionalTags() string { 147 mnt.vfs.lockMounts() 148 defer mnt.vfs.unlockMounts(context.Background()) 149 // TODO(b/249777195): Support MS_SLAVE and MS_UNBINDABLE propagation types. 150 var optional string 151 if mnt.isShared { 152 optional = fmt.Sprintf("shared:%d", mnt.groupID) 153 } 154 return optional 155 } 156 157 // coveringMount returns a mount that completely covers mnt if it exists and nil 158 // otherwise. A mount that covers another is one that is the only child of its 159 // parent and whose mountpoint is its parent's root. 160 func (mnt *Mount) coveringMount() *Mount { 161 if len(mnt.children) != 1 { 162 return nil 163 } 164 // Get the child from the children map. 165 var child *Mount 166 for child = range mnt.children { 167 break 168 } 169 if child.point() != mnt.root { 170 return nil 171 } 172 return child 173 } 174 175 // NewFilesystem creates a new filesystem object not yet associated with any 176 // mounts. It can be installed into the filesystem tree with ConnectMountAt. 177 // Note that only the filesystem-specific mount options from opts are used by 178 // this function, mount flags are ignored. To set mount flags, pass them to a 179 // corresponding ConnectMountAt. 180 func (vfs *VirtualFilesystem) NewFilesystem(ctx context.Context, creds *auth.Credentials, source, fsTypeName string, opts *MountOptions) (*Filesystem, *Dentry, error) { 181 rft := vfs.getFilesystemType(fsTypeName) 182 if rft == nil { 183 return nil, nil, linuxerr.ENODEV 184 } 185 if !opts.InternalMount && !rft.opts.AllowUserMount { 186 return nil, nil, linuxerr.ENODEV 187 } 188 return rft.fsType.GetFilesystem(ctx, vfs, creds, source, opts.GetFilesystemOptions) 189 } 190 191 // NewDisconnectedMount returns a Mount representing fs with the given root 192 // (which may be nil). The new Mount is not associated with any MountNamespace 193 // and is not connected to any other Mounts. References are taken on fs and 194 // root. 195 func (vfs *VirtualFilesystem) NewDisconnectedMount(fs *Filesystem, root *Dentry, opts *MountOptions) *Mount { 196 fs.IncRef() 197 if root != nil { 198 root.IncRef() 199 } 200 return newMount(vfs, fs, root, nil /* mntns */, opts) 201 } 202 203 // MountDisconnected creates a Filesystem configured by the given arguments, 204 // then returns a Mount representing it. The new Mount is not associated with 205 // any MountNamespace and is not connected to any other Mounts. 206 func (vfs *VirtualFilesystem) MountDisconnected(ctx context.Context, creds *auth.Credentials, source string, fsTypeName string, opts *MountOptions) (*Mount, error) { 207 fs, root, err := vfs.NewFilesystem(ctx, creds, source, fsTypeName, opts) 208 if err != nil { 209 return nil, err 210 } 211 return newMount(vfs, fs, root, nil /* mntns */, opts), nil 212 } 213 214 // ConnectMountAt connects mnt at the path represented by target. 215 // 216 // Preconditions: mnt must be disconnected. 217 func (vfs *VirtualFilesystem) ConnectMountAt(ctx context.Context, creds *auth.Credentials, mnt *Mount, target *PathOperation) error { 218 // We can't hold vfs.mountMu while calling FilesystemImpl methods due to 219 // lock ordering. 220 vd, err := vfs.GetDentryAt(ctx, creds, target, &GetDentryOptions{}) 221 if err != nil { 222 return err 223 } 224 vfs.lockMounts() 225 defer vfs.unlockMounts(ctx) 226 // This is equivalent to checking for SB_NOUSER in Linux, which is set on all 227 // anon mounts and sentry-internal filesystems like pipefs. 228 if vd.mount.ns == nil { 229 vfs.delayDecRef(vd) 230 return linuxerr.EINVAL 231 } 232 tree := vfs.preparePropagationTree(mnt, vd) 233 // Check if the new mount + all the propagation mounts puts us over the max. 234 if uint32(len(tree)+1)+vd.mount.ns.mounts > MountMax { 235 // We need to unlock mountMu first because DecRef takes a lock on the 236 // filesystem mutex in some implementations, which can lead to circular 237 // locking. 238 vfs.abortPropagationTree(ctx, tree) 239 vfs.delayDecRef(vd) 240 return linuxerr.ENOSPC 241 } 242 if err := vfs.connectMountAtLocked(ctx, mnt, vd); err != nil { 243 vfs.abortPropagationTree(ctx, tree) 244 return err 245 } 246 vfs.commitPropagationTree(ctx, tree) 247 return nil 248 } 249 250 // connectMountAtLocked attaches mnt at vd. This method consumes a reference on 251 // vd and returns a list of VirtualDentry with an extra reference that must be 252 // DecRef'd outside of vfs.mountMu. 253 // 254 // Preconditions: 255 // - mnt must be disconnected. 256 // - vfs.mountMu must be locked. 257 // 258 // +checklocks:vfs.mountMu 259 func (vfs *VirtualFilesystem) connectMountAtLocked(ctx context.Context, mnt *Mount, vd VirtualDentry) error { 260 vd.dentry.mu.Lock() 261 for { 262 if vd.mount.umounted || vd.dentry.dead { 263 vd.dentry.mu.Unlock() 264 vfs.delayDecRef(vd) 265 return linuxerr.ENOENT 266 } 267 // vd might have been mounted over between vfs.GetDentryAt() and 268 // vfs.mountMu.Lock(). 269 if !vd.dentry.isMounted() { 270 break 271 } 272 nextmnt := vfs.mounts.Lookup(vd.mount, vd.dentry) 273 if nextmnt == nil { 274 break 275 } 276 // It's possible that nextmnt has been umounted but not disconnected, 277 // in which case vfs no longer holds a reference on it, and the last 278 // reference may be concurrently dropped even though we're holding 279 // vfs.mountMu. 280 if !nextmnt.tryIncMountedRef() { 281 break 282 } 283 // This can't fail since we're holding vfs.mountMu. 284 nextmnt.root.IncRef() 285 vd.dentry.mu.Unlock() 286 vfs.delayDecRef(vd) 287 vd = VirtualDentry{ 288 mount: nextmnt, 289 dentry: nextmnt.root, 290 } 291 vd.dentry.mu.Lock() 292 } 293 // TODO(gvisor.dev/issue/1035): Linux requires that either both the mount 294 // point and the mount root are directories, or neither are, and returns 295 // ENOTDIR if this is not the case. 296 mntns := vd.mount.ns 297 vfs.mounts.seq.BeginWrite() 298 vfs.connectLocked(mnt, vd, mntns) 299 vfs.mounts.seq.EndWrite() 300 vd.dentry.mu.Unlock() 301 return nil 302 } 303 304 // CloneMountAt returns a new mount with the same fs, specified root and 305 // mount options. If mnt's propagation type is shared the new mount is 306 // automatically made a peer of mnt. If mount options are nil, mnt's 307 // options are copied. 308 func (vfs *VirtualFilesystem) CloneMountAt(mnt *Mount, root *Dentry, mopts *MountOptions) *Mount { 309 vfs.lockMounts() 310 defer vfs.unlockMounts(context.Background()) 311 clone := vfs.cloneMount(mnt, root, mopts) 312 return clone 313 } 314 315 // cloneMount returns a new mount with mnt.fs as the filesystem and root as the 316 // root. The returned mount has an extra reference. 317 // 318 // +checklocks:vfs.mountMu 319 // +checklocksalias:mnt.vfs.mountMu=vfs.mountMu 320 func (vfs *VirtualFilesystem) cloneMount(mnt *Mount, root *Dentry, mopts *MountOptions) *Mount { 321 opts := mopts 322 if opts == nil { 323 opts = &MountOptions{ 324 Flags: mnt.Flags, 325 ReadOnly: mnt.ReadOnly(), 326 } 327 } 328 clone := vfs.NewDisconnectedMount(mnt.fs, root, opts) 329 if mnt.isShared { 330 vfs.addPeer(mnt, clone) 331 } 332 return clone 333 } 334 335 type cloneTreeNode struct { 336 prevMount *Mount 337 parentMount *Mount 338 } 339 340 // cloneMountTree creates a copy of mnt's tree with the specified root 341 // dentry at root. The new descendents are added to mnt's pending mount list. 342 // 343 // +checklocks:vfs.mountMu 344 func (vfs *VirtualFilesystem) cloneMountTree(ctx context.Context, mnt *Mount, root *Dentry) (*Mount, error) { 345 clone := vfs.cloneMount(mnt, root, nil) 346 queue := []cloneTreeNode{{mnt, clone}} 347 for len(queue) != 0 { 348 p := queue[len(queue)-1] 349 queue = queue[:len(queue)-1] 350 for c := range p.prevMount.children { 351 m := vfs.cloneMount(c, c.root, nil) 352 mp := VirtualDentry{ 353 mount: p.parentMount, 354 dentry: c.point(), 355 } 356 mp.IncRef() 357 m.setKey(mp) 358 p.parentMount.pendingChildren = append(p.parentMount.pendingChildren, m) 359 if len(c.children) != 0 { 360 queue = append(queue, cloneTreeNode{c, m}) 361 } 362 } 363 } 364 return clone, nil 365 } 366 367 // BindAt creates a clone of the source path's parent mount and mounts it at 368 // the target path. The new mount's root dentry is one pointed to by the source 369 // path. 370 // 371 // TODO(b/249121230): Support recursive bind mounting. 372 func (vfs *VirtualFilesystem) BindAt(ctx context.Context, creds *auth.Credentials, source, target *PathOperation) (*Mount, error) { 373 sourceVd, err := vfs.GetDentryAt(ctx, creds, source, &GetDentryOptions{}) 374 if err != nil { 375 return nil, err 376 } 377 defer sourceVd.DecRef(ctx) 378 targetVd, err := vfs.GetDentryAt(ctx, creds, target, &GetDentryOptions{}) 379 if err != nil { 380 return nil, err 381 } 382 vfs.lockMounts() 383 defer vfs.unlockMounts(ctx) 384 // This is equivalent to checking for SB_NOUSER in Linux, which is set on all 385 // anon mounts. 386 if targetVd.mount.ns == nil { 387 vfs.delayDecRef(targetVd) 388 return nil, linuxerr.EINVAL 389 } 390 391 clone := vfs.cloneMount(sourceVd.mount, sourceVd.dentry, nil) 392 vfs.delayDecRef(clone) 393 tree := vfs.preparePropagationTree(clone, targetVd) 394 if uint32(1+len(tree))+targetVd.mount.ns.mounts > MountMax { 395 vfs.setPropagation(clone, linux.MS_PRIVATE) 396 vfs.abortPropagationTree(ctx, tree) 397 vfs.delayDecRef(targetVd) 398 return nil, linuxerr.ENOSPC 399 } 400 401 if err := vfs.connectMountAtLocked(ctx, clone, targetVd); err != nil { 402 vfs.setPropagation(clone, linux.MS_PRIVATE) 403 vfs.abortPropagationTree(ctx, tree) 404 return nil, err 405 } 406 vfs.commitPropagationTree(ctx, tree) 407 return clone, nil 408 } 409 410 // MountAt creates and mounts a Filesystem configured by the given arguments. 411 // The VirtualFilesystem will hold a reference to the Mount until it is 412 // unmounted. 413 // 414 // This method returns the mounted Mount without a reference, for convenience 415 // during VFS setup when there is no chance of racing with unmount. 416 func (vfs *VirtualFilesystem) MountAt(ctx context.Context, creds *auth.Credentials, source string, target *PathOperation, fsTypeName string, opts *MountOptions) (*Mount, error) { 417 mnt, err := vfs.MountDisconnected(ctx, creds, source, fsTypeName, opts) 418 if err != nil { 419 return nil, err 420 } 421 defer mnt.DecRef(ctx) 422 if err := vfs.ConnectMountAt(ctx, creds, mnt, target); err != nil { 423 return nil, err 424 } 425 return mnt, nil 426 } 427 428 // UmountAt removes the Mount at the given path. 429 func (vfs *VirtualFilesystem) UmountAt(ctx context.Context, creds *auth.Credentials, pop *PathOperation, opts *UmountOptions) error { 430 if opts.Flags&^(linux.MNT_FORCE|linux.MNT_DETACH) != 0 { 431 return linuxerr.EINVAL 432 } 433 434 // MNT_FORCE is currently unimplemented except for the permission check. 435 // Force unmounting specifically requires CAP_SYS_ADMIN in the root user 436 // namespace, and not in the owner user namespace for the target mount. See 437 // fs/namespace.c:SYSCALL_DEFINE2(umount, ...) 438 if opts.Flags&linux.MNT_FORCE != 0 && creds.HasCapabilityIn(linux.CAP_SYS_ADMIN, creds.UserNamespace.Root()) { 439 return linuxerr.EPERM 440 } 441 442 vd, err := vfs.GetDentryAt(ctx, creds, pop, &GetDentryOptions{}) 443 if err != nil { 444 return err 445 } 446 // This defer statement is encapsulated in a function because vd.mount can be 447 // modified in the block below. The arguments to defer are evaluated during 448 // the construction of a defer statement, so if vd.DecRef() was not 449 // encapsulated, the vd structure and its underlying pointers _at this point_ 450 // would be copied and DecRefd at the end of this function. 451 defer func() { 452 vd.DecRef(ctx) 453 }() 454 // Linux passes the LOOKUP_MOUNPOINT flag to user_path_at in ksys_umount to 455 // resolve to the toppmost mount in the stack located at the specified path. 456 // vfs.GetMountAt() imitates this behavior. See fs/namei.c:user_path_at(...) 457 // and fs/namespace.c:ksys_umount(...). 458 if vd.dentry.isMounted() { 459 if realmnt := vfs.getMountAt(ctx, vd.mount, vd.dentry); realmnt != nil { 460 vd.mount.DecRef(ctx) 461 vd.mount = realmnt 462 } 463 } else if vd.dentry != vd.mount.root { 464 return linuxerr.EINVAL 465 } 466 467 vfs.lockMounts() 468 defer vfs.unlockMounts(ctx) 469 if mntns := MountNamespaceFromContext(ctx); mntns != nil { 470 vfs.delayDecRef(mntns) 471 if mntns != vd.mount.ns { 472 return linuxerr.EINVAL 473 } 474 475 if vd.mount == vd.mount.ns.root { 476 return linuxerr.EINVAL 477 } 478 } 479 480 umountTree := []*Mount{vd.mount} 481 parent, mountpoint := vd.mount.parent(), vd.mount.point() 482 if parent != nil && parent.isShared { 483 for peer := parent.sharedEntry.Next(); peer != parent; peer = peer.sharedEntry.Next() { 484 umountMnt := vfs.mounts.Lookup(peer, mountpoint) 485 // From https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt: 486 // If any peer has some child mounts, then that mount is not unmounted, 487 // but all other mounts are unmounted. 488 if umountMnt == nil { 489 continue 490 } 491 if len(umountMnt.children) == 0 || umountMnt.coveringMount() != nil { 492 umountTree = append(umountTree, umountMnt) 493 } 494 } 495 } 496 497 // TODO(gvisor.dev/issue/1035): Linux special-cases umount of the caller's 498 // root, which we don't implement yet (we'll just fail it since the caller 499 // holds a reference on it). 500 501 vfs.mounts.seq.BeginWrite() 502 if opts.Flags&linux.MNT_DETACH == 0 { 503 if len(vd.mount.children) != 0 { 504 vfs.mounts.seq.EndWrite() 505 return linuxerr.EBUSY 506 } 507 // We are holding a reference on vd.mount. 508 expectedRefs := int64(1) 509 if !vd.mount.umounted { 510 expectedRefs = 2 511 } 512 if vd.mount.refs.Load()&^math.MinInt64 != expectedRefs { // mask out MSB 513 vfs.mounts.seq.EndWrite() 514 return linuxerr.EBUSY 515 } 516 } 517 for _, mnt := range umountTree { 518 vfs.umountRecursiveLocked(mnt, &umountRecursiveOptions{ 519 eager: opts.Flags&linux.MNT_DETACH == 0, 520 disconnectHierarchy: true, 521 }) 522 } 523 vfs.mounts.seq.EndWrite() 524 return nil 525 } 526 527 // +stateify savable 528 type umountRecursiveOptions struct { 529 // If eager is true, ensure that future calls to Mount.tryIncMountedRef() 530 // on umounted mounts fail. 531 // 532 // eager is analogous to Linux's UMOUNT_SYNC. 533 eager bool 534 535 // If disconnectHierarchy is true, Mounts that are umounted hierarchically 536 // should be disconnected from their parents. (Mounts whose parents are not 537 // umounted, which in most cases means the Mount passed to the initial call 538 // to umountRecursiveLocked, are unconditionally disconnected for 539 // consistency with Linux.) 540 // 541 // disconnectHierarchy is analogous to Linux's !UMOUNT_CONNECTED. 542 disconnectHierarchy bool 543 } 544 545 // umountRecursiveLocked marks mnt and its descendants as umounted. 546 // 547 // umountRecursiveLocked is analogous to Linux's fs/namespace.c:umount_tree(). 548 // 549 // Preconditions: 550 // - vfs.mountMu must be locked. 551 // - vfs.mounts.seq must be in a writer critical section. 552 // 553 // +checklocks:vfs.mountMu 554 func (vfs *VirtualFilesystem) umountRecursiveLocked(mnt *Mount, opts *umountRecursiveOptions) { 555 // covered mounts are a special case where the grandchild mount is 556 // reconnected to the parent after the child is disconnected. 557 var cover *Mount 558 if parent := mnt.parent(); parent != nil && !parent.umounted { 559 if cover = mnt.coveringMount(); cover != nil { 560 vfs.delayDecRef(vfs.disconnectLocked(cover)) 561 cover.setKey(mnt.getKey()) 562 } 563 } 564 if !mnt.umounted { 565 mnt.umounted = true 566 vfs.delayDecRef(mnt) 567 if parent := mnt.parent(); parent != nil && (opts.disconnectHierarchy || !parent.umounted) { 568 vfs.delayDecRef(vfs.disconnectLocked(mnt)) 569 } 570 if mnt.isShared { 571 vfs.setPropagation(mnt, linux.MS_PRIVATE) 572 } 573 } 574 if opts.eager { 575 for { 576 refs := mnt.refs.Load() 577 if refs < 0 { 578 break 579 } 580 if mnt.refs.CompareAndSwap(refs, refs|math.MinInt64) { 581 break 582 } 583 } 584 } 585 for child := range mnt.children { 586 vfs.umountRecursiveLocked(child, opts) 587 } 588 if cover != nil { 589 mp := cover.getKey() 590 mp.IncRef() 591 mp.dentry.mu.Lock() 592 vfs.connectLocked(cover, mp, mp.mount.ns) 593 mp.dentry.mu.Unlock() 594 vfs.delayDecRef(cover) 595 } 596 } 597 598 // connectLocked makes vd the mount parent/point for mnt. It consumes 599 // references held by vd. 600 // 601 // Preconditions: 602 // - vfs.mountMu must be locked. 603 // - vfs.mounts.seq must be in a writer critical section. 604 // - d.mu must be locked. 605 // - mnt.parent() == nil, i.e. mnt must not already be connected. 606 func (vfs *VirtualFilesystem) connectLocked(mnt *Mount, vd VirtualDentry, mntns *MountNamespace) { 607 if checkInvariants { 608 if mnt.parent() != nil { 609 panic("VFS.connectLocked called on connected mount") 610 } 611 } 612 mnt.IncRef() // dropped by callers of umountRecursiveLocked 613 mnt.setKey(vd) 614 if vd.mount.children == nil { 615 vd.mount.children = make(map[*Mount]struct{}) 616 } 617 vd.mount.children[mnt] = struct{}{} 618 vd.dentry.mounts.Add(1) 619 mnt.ns = mntns 620 mntns.mountpoints[vd.dentry]++ 621 mntns.mounts++ 622 vfs.mounts.insertSeqed(mnt) 623 vfsmpmounts, ok := vfs.mountpoints[vd.dentry] 624 if !ok { 625 vfsmpmounts = make(map[*Mount]struct{}) 626 vfs.mountpoints[vd.dentry] = vfsmpmounts 627 } 628 vfsmpmounts[mnt] = struct{}{} 629 vfs.maybeResolveMountPromise(vd) 630 } 631 632 // disconnectLocked makes vd have no mount parent/point and returns its old 633 // mount parent/point with a reference held. 634 // 635 // Preconditions: 636 // - vfs.mountMu must be locked. 637 // - vfs.mounts.seq must be in a writer critical section. 638 // - mnt.parent() != nil. 639 func (vfs *VirtualFilesystem) disconnectLocked(mnt *Mount) VirtualDentry { 640 vd := mnt.getKey() 641 if checkInvariants { 642 if vd.mount == nil { 643 panic("VFS.disconnectLocked called on disconnected mount") 644 } 645 if mnt.ns.mountpoints[vd.dentry] == 0 { 646 panic("VFS.disconnectLocked called on dentry with zero mountpoints.") 647 } 648 if mnt.ns.mounts == 0 { 649 panic("VFS.disconnectLocked called on namespace with zero mounts.") 650 } 651 } 652 delete(vd.mount.children, mnt) 653 vd.dentry.mounts.Add(math.MaxUint32) // -1 654 mnt.ns.mountpoints[vd.dentry]-- 655 mnt.ns.mounts-- 656 if mnt.ns.mountpoints[vd.dentry] == 0 { 657 delete(mnt.ns.mountpoints, vd.dentry) 658 } 659 vfs.mounts.removeSeqed(mnt) 660 mnt.loadKey(VirtualDentry{}) // Clear mnt.key. 661 vfsmpmounts := vfs.mountpoints[vd.dentry] 662 delete(vfsmpmounts, mnt) 663 if len(vfsmpmounts) == 0 { 664 delete(vfs.mountpoints, vd.dentry) 665 } 666 return vd 667 } 668 669 // tryIncMountedRef increments mnt's reference count and returns true. If mnt's 670 // reference count is already zero, or has been eagerly umounted, 671 // tryIncMountedRef does nothing and returns false. 672 // 673 // tryIncMountedRef does not require that a reference is held on mnt. 674 func (mnt *Mount) tryIncMountedRef() bool { 675 for { 676 r := mnt.refs.Load() 677 if r <= 0 { // r < 0 => MSB set => eagerly unmounted 678 return false 679 } 680 if mnt.refs.CompareAndSwap(r, r+1) { 681 if mnt.LogRefs() { 682 refs.LogTryIncRef(mnt, r+1) 683 } 684 return true 685 } 686 } 687 } 688 689 // IncRef increments mnt's reference count. 690 func (mnt *Mount) IncRef() { 691 // In general, negative values for mnt.refs are valid because the MSB is 692 // the eager-unmount bit. 693 r := mnt.refs.Add(1) 694 if mnt.LogRefs() { 695 refs.LogIncRef(mnt, r) 696 } 697 } 698 699 // DecRef decrements mnt's reference count. 700 func (mnt *Mount) DecRef(ctx context.Context) { 701 r := mnt.refs.Add(-1) 702 if mnt.LogRefs() { 703 refs.LogDecRef(mnt, r) 704 } 705 if r&^math.MinInt64 == 0 { // mask out MSB 706 refs.Unregister(mnt) 707 mnt.destroy(ctx) 708 } 709 } 710 711 func (mnt *Mount) destroy(ctx context.Context) { 712 if mnt.parent() != nil { 713 mnt.vfs.lockMounts() 714 mnt.vfs.mounts.seq.BeginWrite() 715 vd := mnt.vfs.disconnectLocked(mnt) 716 if vd.Ok() { 717 mnt.vfs.delayDecRef(vd) 718 } 719 mnt.vfs.mounts.seq.EndWrite() 720 mnt.vfs.unlockMounts(ctx) 721 } 722 if mnt.root != nil { 723 mnt.root.DecRef(ctx) 724 } 725 mnt.fs.DecRef(ctx) 726 } 727 728 // RefType implements refs.CheckedObject.Type. 729 func (mnt *Mount) RefType() string { 730 return "vfs.Mount" 731 } 732 733 // LeakMessage implements refs.CheckedObject.LeakMessage. 734 func (mnt *Mount) LeakMessage() string { 735 return fmt.Sprintf("[vfs.Mount %p] reference count of %d instead of 0", mnt, mnt.refs.Load()) 736 } 737 738 // LogRefs implements refs.CheckedObject.LogRefs. 739 // 740 // This should only be set to true for debugging purposes, as it can generate an 741 // extremely large amount of output and drastically degrade performance. 742 func (mnt *Mount) LogRefs() bool { 743 return false 744 } 745 746 // getMountAt returns the last Mount in the stack mounted at (mnt, d). It takes 747 // a reference on the returned Mount. If (mnt, d) is not a mount point, 748 // getMountAt returns nil. 749 // 750 // getMountAt is analogous to Linux's fs/namei.c:follow_mount(). 751 // 752 // Preconditions: References are held on mnt and d. 753 func (vfs *VirtualFilesystem) getMountAt(ctx context.Context, mnt *Mount, d *Dentry) *Mount { 754 // The first mount is special-cased: 755 // 756 // - The caller is assumed to have checked d.isMounted() already. (This 757 // isn't a precondition because it doesn't matter for correctness.) 758 // 759 // - We return nil, instead of mnt, if there is no mount at (mnt, d). 760 // 761 // - We don't drop the caller's references on mnt and d. 762 retryFirst: 763 next := vfs.mounts.Lookup(mnt, d) 764 if next == nil { 765 return nil 766 } 767 if !next.tryIncMountedRef() { 768 // Raced with umount. 769 goto retryFirst 770 } 771 mnt = next 772 d = next.root 773 // We don't need to take Dentry refs anywhere in this function because 774 // Mounts hold references on Mount.root, which is immutable. 775 for d.isMounted() { 776 next := vfs.mounts.Lookup(mnt, d) 777 if next == nil { 778 break 779 } 780 if !next.tryIncMountedRef() { 781 // Raced with umount. 782 continue 783 } 784 mnt.DecRef(ctx) 785 mnt = next 786 d = next.root 787 } 788 return mnt 789 } 790 791 // getMountpointAt returns the mount point for the stack of Mounts including 792 // mnt. It takes a reference on the returned VirtualDentry. If no such mount 793 // point exists (i.e. mnt is a root mount), getMountpointAt returns (nil, nil). 794 // 795 // Preconditions: 796 // - References are held on mnt and root. 797 // - vfsroot is not (mnt, mnt.root). 798 func (vfs *VirtualFilesystem) getMountpointAt(ctx context.Context, mnt *Mount, vfsroot VirtualDentry) VirtualDentry { 799 // The first mount is special-cased: 800 // 801 // - The caller must have already checked mnt against vfsroot. 802 // 803 // - We return nil, instead of mnt, if there is no mount point for mnt. 804 // 805 // - We don't drop the caller's reference on mnt. 806 retryFirst: 807 epoch := vfs.mounts.seq.BeginRead() 808 parent, point := mnt.parent(), mnt.point() 809 if !vfs.mounts.seq.ReadOk(epoch) { 810 goto retryFirst 811 } 812 if parent == nil { 813 return VirtualDentry{} 814 } 815 if !parent.tryIncMountedRef() { 816 // Raced with umount. 817 goto retryFirst 818 } 819 if !point.TryIncRef() { 820 // Since Mount holds a reference on Mount.key.point, this can only 821 // happen due to a racing change to Mount.key. 822 parent.DecRef(ctx) 823 goto retryFirst 824 } 825 if !vfs.mounts.seq.ReadOk(epoch) { 826 point.DecRef(ctx) 827 parent.DecRef(ctx) 828 goto retryFirst 829 } 830 mnt = parent 831 d := point 832 for { 833 if mnt == vfsroot.mount && d == vfsroot.dentry { 834 break 835 } 836 if d != mnt.root { 837 break 838 } 839 retryNotFirst: 840 epoch := vfs.mounts.seq.BeginRead() 841 parent, point := mnt.parent(), mnt.point() 842 if !vfs.mounts.seq.ReadOk(epoch) { 843 goto retryNotFirst 844 } 845 if parent == nil { 846 break 847 } 848 if !parent.tryIncMountedRef() { 849 // Raced with umount. 850 goto retryNotFirst 851 } 852 if !point.TryIncRef() { 853 // Since Mount holds a reference on Mount.key.point, this can 854 // only happen due to a racing change to Mount.key. 855 parent.DecRef(ctx) 856 goto retryNotFirst 857 } 858 if !vfs.mounts.seq.ReadOk(epoch) { 859 point.DecRef(ctx) 860 parent.DecRef(ctx) 861 goto retryNotFirst 862 } 863 d.DecRef(ctx) 864 mnt.DecRef(ctx) 865 mnt = parent 866 d = point 867 } 868 return VirtualDentry{mnt, d} 869 } 870 871 // PivotRoot makes location pointed to by newRootPop the root of the current 872 // namespace, and moves the current root to the location pointed to by 873 // putOldPop. 874 func (vfs *VirtualFilesystem) PivotRoot(ctx context.Context, creds *auth.Credentials, newRootPop *PathOperation, putOldPop *PathOperation) error { 875 newRootVd, err := vfs.GetDentryAt(ctx, creds, newRootPop, &GetDentryOptions{CheckSearchable: true}) 876 if err != nil { 877 return err 878 } 879 defer newRootVd.DecRef(ctx) 880 putOldVd, err := vfs.GetDentryAt(ctx, creds, putOldPop, &GetDentryOptions{CheckSearchable: true}) 881 if err != nil { 882 return err 883 } 884 defer putOldVd.DecRef(ctx) 885 rootVd := RootFromContext(ctx) 886 defer rootVd.DecRef(ctx) 887 888 retry: 889 epoch := vfs.mounts.seq.BeginRead() 890 // Neither new_root nor put_old can be on the same mount as the current 891 //root mount. 892 if newRootVd.mount == rootVd.mount || putOldVd.mount == rootVd.mount { 893 return linuxerr.EBUSY 894 } 895 // new_root must be a mountpoint. 896 if newRootVd.mount.root != newRootVd.dentry { 897 return linuxerr.EINVAL 898 } 899 // put_old must be at or underneath new_root. 900 path, err := vfs.PathnameReachable(ctx, newRootVd, putOldVd) 901 if err != nil || len(path) == 0 { 902 return linuxerr.EINVAL 903 } 904 // The current root directory must be a mountpoint 905 // (in the case it has been chrooted). 906 if rootVd.mount.root != rootVd.dentry { 907 return linuxerr.EINVAL 908 } 909 // The current root and the new root cannot be on the rootfs mount. 910 if rootVd.mount.parent() == nil || newRootVd.mount.parent() == nil { 911 return linuxerr.EINVAL 912 } 913 // The current root and the new root must be in the context's mount namespace. 914 ns := MountNamespaceFromContext(ctx) 915 defer ns.DecRef(ctx) 916 vfs.lockMounts() 917 if rootVd.mount.ns != ns || newRootVd.mount.ns != ns { 918 vfs.unlockMounts(ctx) 919 return linuxerr.EINVAL 920 } 921 922 // Either the mount point at new_root, or the parent mount of that mount 923 // point, has propagation type MS_SHARED. 924 if newRootParent := newRootVd.mount.parent(); newRootVd.mount.isShared || newRootParent.isShared { 925 vfs.unlockMounts(ctx) 926 return linuxerr.EINVAL 927 } 928 // put_old is a mount point and has the propagation type MS_SHARED. 929 if putOldVd.mount.root == putOldVd.dentry && putOldVd.mount.isShared { 930 vfs.unlockMounts(ctx) 931 return linuxerr.EINVAL 932 } 933 934 if !vfs.mounts.seq.BeginWriteOk(epoch) { 935 // Checks above raced with a mount change. 936 vfs.unlockMounts(ctx) 937 goto retry 938 } 939 defer vfs.unlockMounts(ctx) 940 mp := vfs.disconnectLocked(newRootVd.mount) 941 vfs.delayDecRef(mp) 942 rootMp := vfs.disconnectLocked(rootVd.mount) 943 944 putOldVd.IncRef() 945 putOldVd.dentry.mu.Lock() 946 vfs.connectLocked(rootVd.mount, putOldVd, ns) 947 putOldVd.dentry.mu.Unlock() 948 949 rootMp.dentry.mu.Lock() 950 vfs.connectLocked(newRootVd.mount, rootMp, ns) 951 rootMp.dentry.mu.Unlock() 952 vfs.mounts.seq.EndWrite() 953 954 vfs.delayDecRef(newRootVd.mount) 955 vfs.delayDecRef(rootVd.mount) 956 return nil 957 } 958 959 // SetMountReadOnly sets the mount as ReadOnly. 960 func (vfs *VirtualFilesystem) SetMountReadOnly(mnt *Mount, ro bool) error { 961 vfs.lockMounts() 962 defer vfs.unlockMounts(context.Background()) 963 return mnt.setReadOnlyLocked(ro) 964 } 965 966 // CheckBeginWrite increments the counter of in-progress write operations on 967 // mnt. If mnt is mounted MS_RDONLY, CheckBeginWrite does nothing and returns 968 // EROFS. 969 // 970 // If CheckBeginWrite succeeds, EndWrite must be called when the write 971 // operation is finished. 972 func (mnt *Mount) CheckBeginWrite() error { 973 if mnt.writers.Add(1) < 0 { 974 mnt.writers.Add(-1) 975 return linuxerr.EROFS 976 } 977 return nil 978 } 979 980 // EndWrite indicates that a write operation signaled by a previous successful 981 // call to CheckBeginWrite has finished. 982 func (mnt *Mount) EndWrite() { 983 mnt.writers.Add(-1) 984 } 985 986 // Preconditions: VirtualFilesystem.mountMu must be locked. 987 func (mnt *Mount) setReadOnlyLocked(ro bool) error { 988 if oldRO := mnt.writers.Load() < 0; oldRO == ro { 989 return nil 990 } 991 if ro { 992 if !mnt.writers.CompareAndSwap(0, math.MinInt64) { 993 return linuxerr.EBUSY 994 } 995 return nil 996 } 997 // Unset MSB without dropping any temporary increments from failed calls to 998 // mnt.CheckBeginWrite(). 999 mnt.writers.Add(math.MinInt64) 1000 return nil 1001 } 1002 1003 // ReadOnly returns true if mount is readonly. 1004 func (mnt *Mount) ReadOnly() bool { 1005 return mnt.writers.Load() < 0 1006 } 1007 1008 // Filesystem returns the mounted Filesystem. It does not take a reference on 1009 // the returned Filesystem. 1010 func (mnt *Mount) Filesystem() *Filesystem { 1011 return mnt.fs 1012 } 1013 1014 // submountsLocked returns this Mount and all Mounts that are descendents of 1015 // it. 1016 // 1017 // Precondition: mnt.vfs.mountMu must be held. 1018 func (mnt *Mount) submountsLocked() []*Mount { 1019 mounts := []*Mount{mnt} 1020 for m := range mnt.children { 1021 mounts = append(mounts, m.submountsLocked()...) 1022 } 1023 return mounts 1024 } 1025 1026 // Root returns the mount's root. It does not take a reference on the returned 1027 // Dentry. 1028 func (mnt *Mount) Root() *Dentry { 1029 return mnt.root 1030 } 1031 1032 // GenerateProcMounts emits the contents of /proc/[pid]/mounts for vfs to buf. 1033 // 1034 // Preconditions: taskRootDir.Ok(). 1035 func (vfs *VirtualFilesystem) GenerateProcMounts(ctx context.Context, taskRootDir VirtualDentry, buf *bytes.Buffer) { 1036 rootMnt := taskRootDir.mount 1037 1038 vfs.lockMounts() 1039 mounts := rootMnt.submountsLocked() 1040 // Take a reference on mounts since we need to drop vfs.mountMu before 1041 // calling vfs.PathnameReachable() (=> FilesystemImpl.PrependPath()). 1042 for _, mnt := range mounts { 1043 mnt.IncRef() 1044 } 1045 vfs.unlockMounts(ctx) 1046 defer func() { 1047 for _, mnt := range mounts { 1048 mnt.DecRef(ctx) 1049 } 1050 }() 1051 sort.Slice(mounts, func(i, j int) bool { return mounts[i].ID < mounts[j].ID }) 1052 1053 for _, mnt := range mounts { 1054 // Get the path to this mount relative to task root. 1055 mntRootVD := VirtualDentry{ 1056 mount: mnt, 1057 dentry: mnt.root, 1058 } 1059 path, err := vfs.PathnameReachable(ctx, taskRootDir, mntRootVD) 1060 if err != nil { 1061 // For some reason we didn't get a path. Log a warning 1062 // and run with empty path. 1063 ctx.Warningf("VFS.GenerateProcMounts: error getting pathname for mount root %+v: %v", mnt.root, err) 1064 path = "" 1065 } 1066 if path == "" { 1067 // Either an error occurred, or path is not reachable 1068 // from root. 1069 break 1070 } 1071 1072 opts := "rw" 1073 if mnt.ReadOnly() { 1074 opts = "ro" 1075 } 1076 if mnt.Flags.NoATime { 1077 opts = ",noatime" 1078 } 1079 if mnt.Flags.NoExec { 1080 opts += ",noexec" 1081 } 1082 if mopts := mnt.fs.Impl().MountOptions(); mopts != "" { 1083 opts += "," + mopts 1084 } 1085 1086 // Format: 1087 // <special device or remote filesystem> <mount point> <filesystem type> <mount options> <needs dump> <fsck order> 1088 // 1089 // The "needs dump" and "fsck order" flags are always 0, which 1090 // is allowed. 1091 fmt.Fprintf(buf, "%s %s %s %s %d %d\n", "none", path, mnt.fs.FilesystemType().Name(), opts, 0, 0) 1092 } 1093 } 1094 1095 // GenerateProcMountInfo emits the contents of /proc/[pid]/mountinfo for vfs to 1096 // buf. 1097 // 1098 // Preconditions: taskRootDir.Ok(). 1099 func (vfs *VirtualFilesystem) GenerateProcMountInfo(ctx context.Context, taskRootDir VirtualDentry, buf *bytes.Buffer) { 1100 rootMnt := taskRootDir.mount 1101 1102 vfs.lockMounts() 1103 mounts := rootMnt.submountsLocked() 1104 // Take a reference on mounts since we need to drop vfs.mountMu before 1105 // calling vfs.PathnameReachable() (=> FilesystemImpl.PrependPath()) or 1106 // vfs.StatAt() (=> FilesystemImpl.StatAt()). 1107 for _, mnt := range mounts { 1108 mnt.IncRef() 1109 } 1110 vfs.unlockMounts(ctx) 1111 defer func() { 1112 for _, mnt := range mounts { 1113 mnt.DecRef(ctx) 1114 } 1115 }() 1116 sort.Slice(mounts, func(i, j int) bool { return mounts[i].ID < mounts[j].ID }) 1117 1118 creds := auth.CredentialsFromContext(ctx) 1119 for _, mnt := range mounts { 1120 // Get the path to this mount relative to task root. 1121 mntRootVD := VirtualDentry{ 1122 mount: mnt, 1123 dentry: mnt.root, 1124 } 1125 pathFromRoot, err := vfs.PathnameReachable(ctx, taskRootDir, mntRootVD) 1126 if err != nil { 1127 // For some reason we didn't get a path. Log a warning 1128 // and run with empty path. 1129 ctx.Warningf("VFS.GenerateProcMountInfo: error getting pathname for mount root %+v: %v", mnt.root, err) 1130 continue 1131 } 1132 if pathFromRoot == "" { 1133 // The path is not reachable from root. 1134 continue 1135 } 1136 var pathFromFS string 1137 pathFromFS, err = vfs.PathnameInFilesystem(ctx, mntRootVD) 1138 if err != nil { 1139 // For some reason we didn't get a path. Log a warning 1140 // and run with empty path. 1141 ctx.Warningf("VFS.GenerateProcMountInfo: error getting pathname for mount root %+v: %v", mnt.root, err) 1142 continue 1143 } 1144 if pathFromFS == "" { 1145 // The path is not reachable from root. 1146 continue 1147 } 1148 // Stat the mount root to get the major/minor device numbers. 1149 pop := &PathOperation{ 1150 Root: mntRootVD, 1151 Start: mntRootVD, 1152 } 1153 statx, err := vfs.StatAt(ctx, creds, pop, &StatOptions{}) 1154 if err != nil { 1155 // Well that's not good. Ignore this mount. 1156 ctx.Warningf("VFS.GenerateProcMountInfo: failed to stat mount root %+v: %v", mnt.root, err) 1157 continue 1158 } 1159 1160 // Format: 1161 // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue 1162 // (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) 1163 1164 // (1) Mount ID. 1165 fmt.Fprintf(buf, "%d ", mnt.ID) 1166 1167 // (2) Parent ID (or this ID if there is no parent). 1168 // Note that even if the call to mnt.parent() races with Mount 1169 // destruction (which is possible since we're not holding vfs.mountMu), 1170 // its Mount.ID will still be valid. 1171 pID := mnt.ID 1172 if p := mnt.parent(); p != nil { 1173 pID = p.ID 1174 } 1175 fmt.Fprintf(buf, "%d ", pID) 1176 1177 // (3) Major:Minor device ID. We don't have a superblock, so we 1178 // just use the root inode device number. 1179 fmt.Fprintf(buf, "%d:%d ", statx.DevMajor, statx.DevMinor) 1180 1181 // (4) Root: the pathname of the directory in the filesystem 1182 // which forms the root of this mount. 1183 fmt.Fprintf(buf, "%s ", manglePath(pathFromFS)) 1184 1185 // (5) Mount point (relative to process root). 1186 fmt.Fprintf(buf, "%s ", manglePath(pathFromRoot)) 1187 1188 // (6) Mount options. 1189 opts := "rw" 1190 if mnt.ReadOnly() { 1191 opts = "ro" 1192 } 1193 if mnt.Flags.NoATime { 1194 opts = ",noatime" 1195 } 1196 if mnt.Flags.NoExec { 1197 opts += ",noexec" 1198 } 1199 fmt.Fprintf(buf, "%s ", opts) 1200 1201 // (7) Optional fields: zero or more fields of the form "tag[:value]". 1202 fmt.Fprintf(buf, "%s ", mnt.generateOptionalTags()) 1203 // (8) Separator: the end of the optional fields is marked by a single hyphen. 1204 fmt.Fprintf(buf, "- ") 1205 1206 // (9) Filesystem type. 1207 fmt.Fprintf(buf, "%s ", mnt.fs.FilesystemType().Name()) 1208 1209 // (10) Mount source: filesystem-specific information or "none". 1210 fmt.Fprintf(buf, "none ") 1211 1212 // (11) Superblock options, and final newline. 1213 fmt.Fprintf(buf, "%s\n", superBlockOpts(pathFromRoot, mnt)) 1214 } 1215 } 1216 1217 // manglePath replaces ' ', '\t', '\n', and '\\' with their octal equivalents. 1218 // See Linux fs/seq_file.c:mangle_path. 1219 func manglePath(p string) string { 1220 r := strings.NewReplacer(" ", "\\040", "\t", "\\011", "\n", "\\012", "\\", "\\134") 1221 return r.Replace(p) 1222 } 1223 1224 // superBlockOpts returns the super block options string for the the mount at 1225 // the given path. 1226 func superBlockOpts(mountPath string, mnt *Mount) string { 1227 // Compose super block options by combining global mount flags with 1228 // FS-specific mount options. 1229 opts := "rw" 1230 if mnt.ReadOnly() { 1231 opts = "ro" 1232 } 1233 1234 if mopts := mnt.fs.Impl().MountOptions(); mopts != "" { 1235 opts += "," + mopts 1236 } 1237 1238 // NOTE(b/147673608): If the mount is a ramdisk-based fake cgroupfs, we also 1239 // need to include the cgroup name in the options. For now we just read that 1240 // from the path. Note that this is only possible when "cgroup" isn't 1241 // registered as a valid filesystem type. 1242 // 1243 // TODO(gvisor.dev/issue/190): Once we removed fake cgroupfs support, we 1244 // should remove this. 1245 if cgroupfs := mnt.vfs.getFilesystemType("cgroup"); cgroupfs != nil && cgroupfs.opts.AllowUserMount { 1246 // Real cgroupfs available. 1247 return opts 1248 } 1249 if mnt.fs.FilesystemType().Name() == "cgroup" { 1250 splitPath := strings.Split(mountPath, "/") 1251 cgroupType := splitPath[len(splitPath)-1] 1252 opts += "," + cgroupType 1253 } 1254 1255 return opts 1256 } 1257 1258 // allocateGroupID returns a new mount group id if one is available, and 1259 // error otherwise. If the group ID bitmap is full, double the size of the 1260 // bitmap before allocating the new group id. 1261 // 1262 // +checklocks:vfs.mountMu 1263 func (vfs *VirtualFilesystem) allocateGroupID() (uint32, error) { 1264 groupID, err := vfs.groupIDBitmap.FirstZero(1) 1265 if err != nil { 1266 if err := vfs.groupIDBitmap.Grow(uint32(vfs.groupIDBitmap.Size())); err != nil { 1267 return 0, err 1268 } 1269 } 1270 vfs.groupIDBitmap.Add(groupID) 1271 return groupID, nil 1272 } 1273 1274 // freeGroupID marks a groupID as available for reuse. 1275 // 1276 // +checklocks:vfs.mountMu 1277 func (vfs *VirtualFilesystem) freeGroupID(id uint32) { 1278 vfs.groupIDBitmap.Remove(id) 1279 }