github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/vfs/namespace.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package vfs
    16  
    17  import (
    18  	"github.com/metacubex/gvisor/pkg/context"
    19  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    20  	"github.com/metacubex/gvisor/pkg/refs"
    21  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    22  )
    23  
    24  // A MountNamespace is a collection of Mounts.//
    25  // MountNamespaces are reference-counted. Unless otherwise specified, all
    26  // MountNamespace methods require that a reference is held.
    27  //
    28  // MountNamespace is analogous to Linux's struct mnt_namespace.
    29  //
    30  // +stateify savable
    31  type MountNamespace struct {
    32  	// Refs is the reference count for this mount namespace.
    33  	Refs refs.TryRefCounter
    34  
    35  	// Owner is the usernamespace that owns this mount namespace.
    36  	Owner *auth.UserNamespace
    37  
    38  	// root is the MountNamespace's root mount.
    39  	root *Mount
    40  
    41  	// mountpoints maps all Dentries which are mount points in this namespace
    42  	// to the number of Mounts for which they are mount points. mountpoints is
    43  	// protected by VirtualFilesystem.mountMu.
    44  	//
    45  	// mountpoints is used to determine if a Dentry can be moved or removed
    46  	// (which requires that the Dentry is not a mount point in the calling
    47  	// namespace).
    48  	//
    49  	// mountpoints is maintained even if there are no references held on the
    50  	// MountNamespace; this is required to ensure that
    51  	// VFS.PrepareDeleteDentry() and VFS.PrepareRemoveDentry() operate
    52  	// correctly on unreferenced MountNamespaces.
    53  	mountpoints map[*Dentry]uint32
    54  
    55  	// mounts is the total number of mounts in this mount namespace.
    56  	mounts uint32
    57  
    58  	// pending is the total number of pending mounts in this mount namespace.
    59  	pending uint32
    60  }
    61  
    62  // Namespace is the namespace interface.
    63  type Namespace interface {
    64  	Type() string
    65  	Destroy(ctx context.Context)
    66  }
    67  
    68  // NewMountNamespace returns a new mount namespace with a root filesystem
    69  // configured by the given arguments. A reference is taken on the returned
    70  // MountNamespace.
    71  //
    72  // If nsfs is nil, the default reference counter is used.
    73  func (vfs *VirtualFilesystem) NewMountNamespace(
    74  	ctx context.Context,
    75  	creds *auth.Credentials,
    76  	source, fsTypeName string,
    77  	opts *MountOptions,
    78  	nsfs NamespaceInodeGetter,
    79  ) (*MountNamespace, error) {
    80  	rft := vfs.getFilesystemType(fsTypeName)
    81  	if rft == nil {
    82  		ctx.Warningf("Unknown filesystem type: %s", fsTypeName)
    83  		return nil, linuxerr.ENODEV
    84  	}
    85  	fs, root, err := rft.fsType.GetFilesystem(ctx, vfs, creds, source, opts.GetFilesystemOptions)
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  	return vfs.NewMountNamespaceFrom(ctx, creds, fs, root, opts, nsfs), nil
    90  }
    91  
    92  type namespaceDefaultRefs struct {
    93  	namespaceRefs
    94  	destroy func(ctx context.Context)
    95  }
    96  
    97  func (r *namespaceDefaultRefs) DecRef(ctx context.Context) {
    98  	r.namespaceRefs.DecRef(
    99  		func() {
   100  			r.destroy(ctx)
   101  		},
   102  	)
   103  }
   104  
   105  // NewMountNamespaceFrom constructs a new mount namespace from an existing
   106  // filesystem and its root dentry. This is similar to NewMountNamespace, but
   107  // uses an existing filesystem instead of constructing a new one.
   108  func (vfs *VirtualFilesystem) NewMountNamespaceFrom(
   109  	ctx context.Context,
   110  	creds *auth.Credentials,
   111  	fs *Filesystem,
   112  	root *Dentry,
   113  	opts *MountOptions,
   114  	nsfs NamespaceInodeGetter,
   115  ) *MountNamespace {
   116  	mntns := &MountNamespace{
   117  		Owner:       creds.UserNamespace,
   118  		mountpoints: make(map[*Dentry]uint32),
   119  	}
   120  	if nsfs == nil {
   121  		refs := &namespaceDefaultRefs{destroy: mntns.Destroy}
   122  		refs.InitRefs()
   123  		mntns.Refs = refs
   124  	} else {
   125  		mntns.Refs = nsfs.GetNamespaceInode(ctx, mntns)
   126  	}
   127  	mntns.root = newMount(vfs, fs, root, mntns, opts)
   128  	return mntns
   129  }
   130  
   131  type cloneEntry struct {
   132  	prevMount   *Mount
   133  	parentMount *Mount
   134  }
   135  
   136  // +checklocks:vfs.mountMu
   137  func (vfs *VirtualFilesystem) updateRootAndCWD(ctx context.Context, root *VirtualDentry, cwd *VirtualDentry, src *Mount, dst *Mount) {
   138  	if root.mount == src {
   139  		vfs.delayDecRef(root.mount)
   140  		root.mount = dst
   141  		root.mount.IncRef()
   142  	}
   143  	if cwd.mount == src {
   144  		vfs.delayDecRef(cwd.mount)
   145  		cwd.mount = dst
   146  		cwd.mount.IncRef()
   147  	}
   148  }
   149  
   150  // NamespaceInodeGetter is an interface that provides the GetNamespaceInode method.
   151  type NamespaceInodeGetter interface {
   152  	GetNamespaceInode(ctx context.Context, ns Namespace) refs.TryRefCounter
   153  }
   154  
   155  // CloneMountNamespace makes a copy of the specified mount namespace.
   156  //
   157  // If `root` or `cwd` have mounts in the old namespace, they will be replaced
   158  // with proper mounts from the new namespace.
   159  func (vfs *VirtualFilesystem) CloneMountNamespace(
   160  	ctx context.Context,
   161  	creds *auth.Credentials,
   162  	ns *MountNamespace,
   163  	root *VirtualDentry,
   164  	cwd *VirtualDentry,
   165  	nsfs NamespaceInodeGetter,
   166  ) (*MountNamespace, error) {
   167  	newns := &MountNamespace{
   168  		Owner:       creds.UserNamespace,
   169  		mountpoints: make(map[*Dentry]uint32),
   170  	}
   171  
   172  	newns.Refs = nsfs.GetNamespaceInode(ctx, newns)
   173  	vfs.lockMounts()
   174  	defer vfs.unlockMounts(ctx)
   175  
   176  	cloneType := 0
   177  	if ns.Owner != newns.Owner {
   178  		cloneType = sharedToFollowerClone
   179  	}
   180  	newRoot, err := vfs.cloneMountTree(ctx, ns.root, ns.root.root, cloneType,
   181  		func(ctx context.Context, src, dst *Mount) {
   182  			vfs.updateRootAndCWD(ctx, root, cwd, src, dst) // +checklocksforce: vfs.mountMu is locked.
   183  		})
   184  	if err != nil {
   185  		newns.DecRef(ctx)
   186  		return nil, err
   187  	}
   188  	newns.root = newRoot
   189  	newns.root.ns = newns
   190  	vfs.commitChildren(ctx, newRoot)
   191  	if ns.Owner != newns.Owner {
   192  		vfs.lockMountTree(newRoot)
   193  	}
   194  	return newns, nil
   195  }
   196  
   197  // Destroy implements nsfs.Namespace.Destroy.
   198  func (mntns *MountNamespace) Destroy(ctx context.Context) {
   199  	vfs := mntns.root.fs.VirtualFilesystem()
   200  	vfs.lockMounts()
   201  	vfs.umountTreeLocked(mntns.root, &umountRecursiveOptions{
   202  		disconnectHierarchy: true,
   203  	})
   204  	vfs.unlockMounts(ctx)
   205  }
   206  
   207  // Type implements nsfs.Namespace.Type.
   208  func (mntns *MountNamespace) Type() string {
   209  	return "mnt"
   210  }
   211  
   212  // IncRef increments mntns' refcount.
   213  func (mntns *MountNamespace) IncRef() {
   214  	mntns.Refs.IncRef()
   215  }
   216  
   217  // DecRef decrements mntns' reference count.
   218  func (mntns *MountNamespace) DecRef(ctx context.Context) {
   219  	mntns.Refs.DecRef(ctx)
   220  }
   221  
   222  // TryIncRef attempts to increment mntns' reference count.
   223  func (mntns *MountNamespace) TryIncRef() bool {
   224  	return mntns.Refs.TryIncRef()
   225  }
   226  
   227  // Root returns mntns' root. If the root is over-mounted, it returns the top
   228  // mount.
   229  func (mntns *MountNamespace) Root(ctx context.Context) VirtualDentry {
   230  	vfs := mntns.root.fs.VirtualFilesystem()
   231  	vd := VirtualDentry{
   232  		mount:  mntns.root,
   233  		dentry: mntns.root.root,
   234  	}
   235  	vd.IncRef()
   236  	if !vd.dentry.isMounted() {
   237  		return vd
   238  	}
   239  	m := vfs.getMountAt(ctx, vd.mount, vd.dentry)
   240  	if m == nil {
   241  		return vd
   242  	}
   243  	vd.DecRef(ctx)
   244  	vd.mount = m
   245  	vd.dentry = m.root
   246  	vd.dentry.IncRef()
   247  	return vd
   248  }
   249  
   250  func (mntns *MountNamespace) checkMountCount(ctx context.Context, mnt *Mount) error {
   251  	if mntns.mounts > MountMax {
   252  		return linuxerr.ENOSPC
   253  	}
   254  	if mntns.mounts+mntns.pending > MountMax {
   255  		return linuxerr.ENOSPC
   256  	}
   257  	mnts := mnt.countSubmountsLocked()
   258  	if mntns.mounts+mntns.pending+mnts > MountMax {
   259  		return linuxerr.ENOSPC
   260  	}
   261  	mntns.pending += mnts
   262  	return nil
   263  }