github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/vfs/namespace.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package vfs 16 17 import ( 18 "github.com/metacubex/gvisor/pkg/context" 19 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 20 "github.com/metacubex/gvisor/pkg/refs" 21 "github.com/metacubex/gvisor/pkg/sentry/kernel/auth" 22 ) 23 24 // A MountNamespace is a collection of Mounts.// 25 // MountNamespaces are reference-counted. Unless otherwise specified, all 26 // MountNamespace methods require that a reference is held. 27 // 28 // MountNamespace is analogous to Linux's struct mnt_namespace. 29 // 30 // +stateify savable 31 type MountNamespace struct { 32 // Refs is the reference count for this mount namespace. 33 Refs refs.TryRefCounter 34 35 // Owner is the usernamespace that owns this mount namespace. 36 Owner *auth.UserNamespace 37 38 // root is the MountNamespace's root mount. 39 root *Mount 40 41 // mountpoints maps all Dentries which are mount points in this namespace 42 // to the number of Mounts for which they are mount points. mountpoints is 43 // protected by VirtualFilesystem.mountMu. 44 // 45 // mountpoints is used to determine if a Dentry can be moved or removed 46 // (which requires that the Dentry is not a mount point in the calling 47 // namespace). 48 // 49 // mountpoints is maintained even if there are no references held on the 50 // MountNamespace; this is required to ensure that 51 // VFS.PrepareDeleteDentry() and VFS.PrepareRemoveDentry() operate 52 // correctly on unreferenced MountNamespaces. 53 mountpoints map[*Dentry]uint32 54 55 // mounts is the total number of mounts in this mount namespace. 56 mounts uint32 57 58 // pending is the total number of pending mounts in this mount namespace. 59 pending uint32 60 } 61 62 // Namespace is the namespace interface. 63 type Namespace interface { 64 Type() string 65 Destroy(ctx context.Context) 66 } 67 68 // NewMountNamespace returns a new mount namespace with a root filesystem 69 // configured by the given arguments. A reference is taken on the returned 70 // MountNamespace. 71 // 72 // If nsfs is nil, the default reference counter is used. 73 func (vfs *VirtualFilesystem) NewMountNamespace( 74 ctx context.Context, 75 creds *auth.Credentials, 76 source, fsTypeName string, 77 opts *MountOptions, 78 nsfs NamespaceInodeGetter, 79 ) (*MountNamespace, error) { 80 rft := vfs.getFilesystemType(fsTypeName) 81 if rft == nil { 82 ctx.Warningf("Unknown filesystem type: %s", fsTypeName) 83 return nil, linuxerr.ENODEV 84 } 85 fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, opts.GetFilesystemOptions) 86 if err != nil { 87 return nil, err 88 } 89 return vfs.NewMountNamespaceFrom(ctx, creds, fs, root, opts, nsfs), nil 90 } 91 92 type namespaceDefaultRefs struct { 93 namespaceRefs 94 destroy func(ctx context.Context) 95 } 96 97 func (r *namespaceDefaultRefs) DecRef(ctx context.Context) { 98 r.namespaceRefs.DecRef( 99 func() { 100 r.destroy(ctx) 101 }, 102 ) 103 } 104 105 // NewMountNamespaceFrom constructs a new mount namespace from an existing 106 // filesystem and its root dentry. This is similar to NewMountNamespace, but 107 // uses an existing filesystem instead of constructing a new one. 108 func (vfs *VirtualFilesystem) NewMountNamespaceFrom( 109 ctx context.Context, 110 creds *auth.Credentials, 111 fs *Filesystem, 112 root *Dentry, 113 opts *MountOptions, 114 nsfs NamespaceInodeGetter, 115 ) *MountNamespace { 116 mntns := &MountNamespace{ 117 Owner: creds.UserNamespace, 118 mountpoints: make(map[*Dentry]uint32), 119 } 120 if nsfs == nil { 121 refs := &namespaceDefaultRefs{destroy: mntns.Destroy} 122 refs.InitRefs() 123 mntns.Refs = refs 124 } else { 125 mntns.Refs = nsfs.GetNamespaceInode(ctx, mntns) 126 } 127 mntns.root = newMount(vfs, fs, root, mntns, opts) 128 return mntns 129 } 130 131 type cloneEntry struct { 132 prevMount *Mount 133 parentMount *Mount 134 } 135 136 // +checklocks:vfs.mountMu 137 func (vfs *VirtualFilesystem) updateRootAndCWD(ctx context.Context, root *VirtualDentry, cwd *VirtualDentry, src *Mount, dst *Mount) { 138 if root.mount == src { 139 vfs.delayDecRef(root.mount) 140 root.mount = dst 141 root.mount.IncRef() 142 } 143 if cwd.mount == src { 144 vfs.delayDecRef(cwd.mount) 145 cwd.mount = dst 146 cwd.mount.IncRef() 147 } 148 } 149 150 // NamespaceInodeGetter is an interface that provides the GetNamespaceInode method. 151 type NamespaceInodeGetter interface { 152 GetNamespaceInode(ctx context.Context, ns Namespace) refs.TryRefCounter 153 } 154 155 // CloneMountNamespace makes a copy of the specified mount namespace. 156 // 157 // If `root` or `cwd` have mounts in the old namespace, they will be replaced 158 // with proper mounts from the new namespace. 159 func (vfs *VirtualFilesystem) CloneMountNamespace( 160 ctx context.Context, 161 creds *auth.Credentials, 162 ns *MountNamespace, 163 root *VirtualDentry, 164 cwd *VirtualDentry, 165 nsfs NamespaceInodeGetter, 166 ) (*MountNamespace, error) { 167 newns := &MountNamespace{ 168 Owner: creds.UserNamespace, 169 mountpoints: make(map[*Dentry]uint32), 170 } 171 172 newns.Refs = nsfs.GetNamespaceInode(ctx, newns) 173 vfs.lockMounts() 174 defer vfs.unlockMounts(ctx) 175 176 cloneType := 0 177 if ns.Owner != newns.Owner { 178 cloneType = sharedToFollowerClone 179 } 180 newRoot, err := vfs.cloneMountTree(ctx, ns.root, ns.root.root, cloneType, 181 func(ctx context.Context, src, dst *Mount) { 182 vfs.updateRootAndCWD(ctx, root, cwd, src, dst) // +checklocksforce: vfs.mountMu is locked. 183 }) 184 if err != nil { 185 newns.DecRef(ctx) 186 return nil, err 187 } 188 newns.root = newRoot 189 newns.root.ns = newns 190 vfs.commitChildren(ctx, newRoot) 191 if ns.Owner != newns.Owner { 192 vfs.lockMountTree(newRoot) 193 } 194 return newns, nil 195 } 196 197 // Destroy implements nsfs.Namespace.Destroy. 198 func (mntns *MountNamespace) Destroy(ctx context.Context) { 199 vfs := mntns.root.fs.VirtualFilesystem() 200 vfs.lockMounts() 201 vfs.umountTreeLocked(mntns.root, &umountRecursiveOptions{ 202 disconnectHierarchy: true, 203 }) 204 vfs.unlockMounts(ctx) 205 } 206 207 // Type implements nsfs.Namespace.Type. 208 func (mntns *MountNamespace) Type() string { 209 return "mnt" 210 } 211 212 // IncRef increments mntns' refcount. 213 func (mntns *MountNamespace) IncRef() { 214 mntns.Refs.IncRef() 215 } 216 217 // DecRef decrements mntns' reference count. 218 func (mntns *MountNamespace) DecRef(ctx context.Context) { 219 mntns.Refs.DecRef(ctx) 220 } 221 222 // TryIncRef attempts to increment mntns' reference count. 223 func (mntns *MountNamespace) TryIncRef() bool { 224 return mntns.Refs.TryIncRef() 225 } 226 227 // Root returns mntns' root. If the root is over-mounted, it returns the top 228 // mount. 229 func (mntns *MountNamespace) Root(ctx context.Context) VirtualDentry { 230 vfs := mntns.root.fs.VirtualFilesystem() 231 vd := VirtualDentry{ 232 mount: mntns.root, 233 dentry: mntns.root.root, 234 } 235 vd.IncRef() 236 if !vd.dentry.isMounted() { 237 return vd 238 } 239 m := vfs.getMountAt(ctx, vd.mount, vd.dentry) 240 if m == nil { 241 return vd 242 } 243 vd.DecRef(ctx) 244 vd.mount = m 245 vd.dentry = m.root 246 vd.dentry.IncRef() 247 return vd 248 } 249 250 func (mntns *MountNamespace) checkMountCount(ctx context.Context, mnt *Mount) error { 251 if mntns.mounts > MountMax { 252 return linuxerr.ENOSPC 253 } 254 if mntns.mounts+mntns.pending > MountMax { 255 return linuxerr.ENOSPC 256 } 257 mnts := mnt.countSubmountsLocked() 258 if mntns.mounts+mntns.pending+mnts > MountMax { 259 return linuxerr.ENOSPC 260 } 261 mntns.pending += mnts 262 return nil 263 }