github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/gofer/path.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/SagerNet/gvisor/pkg/context"
    21  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    22  	"github.com/SagerNet/gvisor/pkg/log"
    23  	"github.com/SagerNet/gvisor/pkg/p9"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/device"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/pipe"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    28  	"github.com/SagerNet/gvisor/pkg/syserror"
    29  )
    30  
    31  // maxFilenameLen is the maximum length of a filename. This is dictated by 9P's
    32  // encoding of strings, which uses 2 bytes for the length prefix.
    33  const maxFilenameLen = (1 << 16) - 1
    34  
    35  func changeType(mode p9.FileMode, newType p9.FileMode) p9.FileMode {
    36  	if newType&^p9.FileModeMask != 0 {
    37  		panic(fmt.Sprintf("newType contained more bits than just file mode: %x", newType))
    38  	}
    39  	clear := mode &^ p9.FileModeMask
    40  	return clear | newType
    41  }
    42  
    43  // Lookup loads an Inode at name into a Dirent based on the session's cache
    44  // policy.
    45  func (i *inodeOperations) Lookup(ctx context.Context, dir *fs.Inode, name string) (*fs.Dirent, error) {
    46  	if len(name) > maxFilenameLen {
    47  		return nil, linuxerr.ENAMETOOLONG
    48  	}
    49  
    50  	s := i.session()
    51  	cp := s.cachePolicy
    52  	if cp.cacheReaddir() {
    53  		// Check to see if we have readdirCache that indicates the
    54  		// child does not exist.  Avoid holding readdirMu longer than
    55  		// we need to.
    56  		i.readdirMu.Lock()
    57  		if i.readdirCache != nil && !i.readdirCache.Contains(name) {
    58  			// No such child.
    59  			i.readdirMu.Unlock()
    60  			if cp.cacheNegativeDirents() {
    61  				return fs.NewNegativeDirent(name), nil
    62  			}
    63  			return nil, syserror.ENOENT
    64  		}
    65  		i.readdirMu.Unlock()
    66  	}
    67  
    68  	// Get a p9.File for name.
    69  	qids, newFile, mask, p9attr, err := i.fileState.file.walkGetAttr(ctx, []string{name})
    70  	if err != nil {
    71  		if linuxerr.Equals(linuxerr.ENOENT, err) {
    72  			if cp.cacheNegativeDirents() {
    73  				// Return a negative Dirent. It will stay cached until something
    74  				// is created over it.
    75  				return fs.NewNegativeDirent(name), nil
    76  			}
    77  			return nil, syserror.ENOENT
    78  		}
    79  		return nil, err
    80  	}
    81  
    82  	if s.overrides != nil {
    83  		// Check if file belongs to a internal named pipe. Note that it doesn't need
    84  		// to check for sockets because it's done in newInodeOperations below.
    85  		deviceKey := device.MultiDeviceKey{
    86  			Device:          p9attr.RDev,
    87  			SecondaryDevice: i.session().connID,
    88  			Inode:           qids[0].Path,
    89  		}
    90  		s.overrides.lock()
    91  		if pipeInode := s.overrides.getPipe(deviceKey); pipeInode != nil {
    92  			s.overrides.unlock()
    93  			pipeInode.IncRef()
    94  			return fs.NewDirent(ctx, pipeInode, name), nil
    95  		}
    96  		s.overrides.unlock()
    97  	}
    98  
    99  	// Construct the Inode operations.
   100  	sattr, node := newInodeOperations(ctx, i.fileState.s, newFile, qids[0], mask, p9attr)
   101  
   102  	// Construct a positive Dirent.
   103  	return fs.NewDirent(ctx, fs.NewInode(ctx, node, dir.MountSource, sattr), name), nil
   104  }
   105  
   106  // Creates a new Inode at name and returns its File based on the session's cache policy.
   107  //
   108  // Ownership is currently ignored.
   109  func (i *inodeOperations) Create(ctx context.Context, dir *fs.Inode, name string, flags fs.FileFlags, perm fs.FilePermissions) (*fs.File, error) {
   110  	if len(name) > maxFilenameLen {
   111  		return nil, linuxerr.ENAMETOOLONG
   112  	}
   113  
   114  	// Create replaces the directory fid with the newly created/opened
   115  	// file, so clone this directory so it doesn't change out from under
   116  	// this node.
   117  	_, newFile, err := i.fileState.file.walk(ctx, nil)
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  
   122  	// Map the FileFlags to p9 OpenFlags.
   123  	var openFlags p9.OpenFlags
   124  	switch {
   125  	case flags.Read && flags.Write:
   126  		openFlags = p9.ReadWrite
   127  	case flags.Read:
   128  		openFlags = p9.ReadOnly
   129  	case flags.Write:
   130  		openFlags = p9.WriteOnly
   131  	default:
   132  		panic(fmt.Sprintf("Create called with unknown or unset open flags: %v", flags))
   133  	}
   134  
   135  	// If the parent directory has setgid enabled, change the new file's owner.
   136  	owner := fs.FileOwnerFromContext(ctx)
   137  	parentUattr, err := dir.UnstableAttr(ctx)
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  	if parentUattr.Perms.SetGID {
   142  		owner.GID = parentUattr.Owner.GID
   143  	}
   144  
   145  	hostFile, err := newFile.create(ctx, name, openFlags, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID))
   146  	if err != nil {
   147  		// Could not create the file.
   148  		newFile.close(ctx)
   149  		return nil, err
   150  	}
   151  
   152  	i.touchModificationAndStatusChangeTime(ctx, dir)
   153  
   154  	// Get an unopened p9.File for the file we created so that it can be cloned
   155  	// and re-opened multiple times after creation, while also getting its
   156  	// attributes. Both are required for inodeOperations.
   157  	qids, unopened, mask, p9attr, err := i.fileState.file.walkGetAttr(ctx, []string{name})
   158  	if err != nil {
   159  		newFile.close(ctx)
   160  		if hostFile != nil {
   161  			hostFile.Close()
   162  		}
   163  		return nil, err
   164  	}
   165  	if len(qids) != 1 {
   166  		log.Warningf("WalkGetAttr(%s) succeeded, but returned %d QIDs (%v), wanted 1", name, len(qids), qids)
   167  		newFile.close(ctx)
   168  		if hostFile != nil {
   169  			hostFile.Close()
   170  		}
   171  		unopened.close(ctx)
   172  		return nil, syserror.EIO
   173  	}
   174  	qid := qids[0]
   175  
   176  	// Construct the InodeOperations.
   177  	sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, p9attr)
   178  
   179  	// Construct the positive Dirent.
   180  	d := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
   181  	defer d.DecRef(ctx)
   182  
   183  	// Construct the new file, caching the handles if allowed.
   184  	h := handles{
   185  		File: newFile,
   186  		Host: hostFile,
   187  	}
   188  	h.EnableLeakCheck("gofer.handles")
   189  	if iops.fileState.canShareHandles() {
   190  		iops.fileState.handlesMu.Lock()
   191  		iops.fileState.setSharedHandlesLocked(flags, &h)
   192  		iops.fileState.handlesMu.Unlock()
   193  	}
   194  	return NewFile(ctx, d, name, flags, iops, &h), nil
   195  }
   196  
   197  // CreateLink uses Create to create a symlink between oldname and newname.
   198  func (i *inodeOperations) CreateLink(ctx context.Context, dir *fs.Inode, oldname string, newname string) error {
   199  	if len(newname) > maxFilenameLen {
   200  		return linuxerr.ENAMETOOLONG
   201  	}
   202  
   203  	owner := fs.FileOwnerFromContext(ctx)
   204  	if _, err := i.fileState.file.symlink(ctx, oldname, newname, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
   205  		return err
   206  	}
   207  	i.touchModificationAndStatusChangeTime(ctx, dir)
   208  	return nil
   209  }
   210  
   211  // CreateHardLink implements InodeOperations.CreateHardLink.
   212  func (i *inodeOperations) CreateHardLink(ctx context.Context, inode *fs.Inode, target *fs.Inode, newName string) error {
   213  	if len(newName) > maxFilenameLen {
   214  		return linuxerr.ENAMETOOLONG
   215  	}
   216  
   217  	targetOpts, ok := target.InodeOperations.(*inodeOperations)
   218  	if !ok {
   219  		return linuxerr.EXDEV
   220  	}
   221  
   222  	if err := i.fileState.file.link(ctx, &targetOpts.fileState.file, newName); err != nil {
   223  		return err
   224  	}
   225  
   226  	s := i.session()
   227  	if s.cachePolicy.cacheUAttrs(inode) {
   228  		// Increase link count.
   229  		targetOpts.cachingInodeOps.IncLinks(ctx)
   230  	}
   231  
   232  	i.touchModificationAndStatusChangeTime(ctx, inode)
   233  	return nil
   234  }
   235  
   236  // CreateDirectory uses Create to create a directory named s under inodeOperations.
   237  func (i *inodeOperations) CreateDirectory(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
   238  	if len(name) > maxFilenameLen {
   239  		return linuxerr.ENAMETOOLONG
   240  	}
   241  
   242  	// If the parent directory has setgid enabled, change the new directory's
   243  	// owner and enable setgid.
   244  	owner := fs.FileOwnerFromContext(ctx)
   245  	parentUattr, err := dir.UnstableAttr(ctx)
   246  	if err != nil {
   247  		return err
   248  	}
   249  	if parentUattr.Perms.SetGID {
   250  		owner.GID = parentUattr.Owner.GID
   251  		perm.SetGID = true
   252  	}
   253  
   254  	if _, err := i.fileState.file.mkdir(ctx, name, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
   255  		return err
   256  	}
   257  
   258  	s := i.session()
   259  	if s.cachePolicy.cacheUAttrs(dir) {
   260  		// Increase link count.
   261  		//
   262  		// N.B. This will update the modification time.
   263  		i.cachingInodeOps.IncLinks(ctx)
   264  	}
   265  	if s.cachePolicy.cacheReaddir() {
   266  		// Invalidate readdir cache.
   267  		i.markDirectoryDirty()
   268  	}
   269  	return nil
   270  }
   271  
   272  // Bind implements InodeOperations.Bind.
   273  func (i *inodeOperations) Bind(ctx context.Context, dir *fs.Inode, name string, ep transport.BoundEndpoint, perm fs.FilePermissions) (*fs.Dirent, error) {
   274  	if len(name) > maxFilenameLen {
   275  		return nil, linuxerr.ENAMETOOLONG
   276  	}
   277  
   278  	s := i.session()
   279  	if s.overrides == nil {
   280  		return nil, syserror.EOPNOTSUPP
   281  	}
   282  
   283  	// Stabilize the override map while creation is in progress.
   284  	s.overrides.lock()
   285  	defer s.overrides.unlock()
   286  
   287  	sattr, iops, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeSocket)
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  
   292  	// Construct the positive Dirent.
   293  	childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
   294  	s.overrides.addBoundEndpoint(iops.fileState.key, childDir, ep)
   295  	return childDir, nil
   296  }
   297  
   298  // CreateFifo implements fs.InodeOperations.CreateFifo.
   299  func (i *inodeOperations) CreateFifo(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions) error {
   300  	if len(name) > maxFilenameLen {
   301  		return linuxerr.ENAMETOOLONG
   302  	}
   303  
   304  	owner := fs.FileOwnerFromContext(ctx)
   305  	mode := p9.FileMode(perm.LinuxMode()) | p9.ModeNamedPipe
   306  
   307  	// N.B. FIFOs use major/minor numbers 0.
   308  	s := i.session()
   309  	if _, err := i.fileState.file.mknod(ctx, name, mode, 0, 0, p9.UID(owner.UID), p9.GID(owner.GID)); err != nil {
   310  		if s.overrides == nil || !linuxerr.Equals(linuxerr.EPERM, err) {
   311  			return err
   312  		}
   313  		// If gofer doesn't support mknod, check if we can create an internal fifo.
   314  		return i.createInternalFifo(ctx, dir, name, owner, perm)
   315  	}
   316  
   317  	i.touchModificationAndStatusChangeTime(ctx, dir)
   318  	return nil
   319  }
   320  
   321  func (i *inodeOperations) createInternalFifo(ctx context.Context, dir *fs.Inode, name string, owner fs.FileOwner, perm fs.FilePermissions) error {
   322  	s := i.session()
   323  	if s.overrides == nil {
   324  		return linuxerr.EPERM
   325  	}
   326  
   327  	// Stabilize the override map while creation is in progress.
   328  	s.overrides.lock()
   329  	defer s.overrides.unlock()
   330  
   331  	sattr, fileOps, err := i.createEndpointFile(ctx, dir, name, perm, p9.ModeNamedPipe)
   332  	if err != nil {
   333  		return err
   334  	}
   335  
   336  	// First create a pipe.
   337  	p := pipe.NewPipe(true /* isNamed */, pipe.DefaultPipeSize)
   338  
   339  	// Wrap the fileOps with our Fifo.
   340  	iops := &fifo{
   341  		InodeOperations: pipe.NewInodeOperations(ctx, perm, p),
   342  		fileIops:        fileOps,
   343  	}
   344  	inode := fs.NewInode(ctx, iops, dir.MountSource, sattr)
   345  
   346  	// Construct the positive Dirent.
   347  	childDir := fs.NewDirent(ctx, fs.NewInode(ctx, iops, dir.MountSource, sattr), name)
   348  	s.overrides.addPipe(fileOps.fileState.key, childDir, inode)
   349  	return nil
   350  }
   351  
   352  // Caller must hold Session.endpoint lock.
   353  func (i *inodeOperations) createEndpointFile(ctx context.Context, dir *fs.Inode, name string, perm fs.FilePermissions, fileType p9.FileMode) (fs.StableAttr, *inodeOperations, error) {
   354  	_, dirClone, err := i.fileState.file.walk(ctx, nil)
   355  	if err != nil {
   356  		return fs.StableAttr{}, nil, err
   357  	}
   358  	// We're not going to use dirClone after return.
   359  	defer dirClone.close(ctx)
   360  
   361  	// Create a regular file in the gofer and then mark it as a socket by
   362  	// adding this inode key in the 'overrides' map.
   363  	owner := fs.FileOwnerFromContext(ctx)
   364  	hostFile, err := dirClone.create(ctx, name, p9.ReadWrite, p9.FileMode(perm.LinuxMode()), p9.UID(owner.UID), p9.GID(owner.GID))
   365  	if err != nil {
   366  		return fs.StableAttr{}, nil, err
   367  	}
   368  	// We're not going to use this file.
   369  	hostFile.Close()
   370  
   371  	i.touchModificationAndStatusChangeTime(ctx, dir)
   372  
   373  	// Get the attributes of the file to create inode key.
   374  	qid, mask, attr, err := getattr(ctx, dirClone)
   375  	if err != nil {
   376  		return fs.StableAttr{}, nil, err
   377  	}
   378  
   379  	// Get an unopened p9.File for the file we created so that it can be
   380  	// cloned and re-opened multiple times after creation.
   381  	_, unopened, err := i.fileState.file.walk(ctx, []string{name})
   382  	if err != nil {
   383  		return fs.StableAttr{}, nil, err
   384  	}
   385  
   386  	// Construct new inode with file type overridden.
   387  	attr.Mode = changeType(attr.Mode, fileType)
   388  	sattr, iops := newInodeOperations(ctx, i.fileState.s, unopened, qid, mask, attr)
   389  	return sattr, iops, nil
   390  }
   391  
   392  // Remove implements InodeOperations.Remove.
   393  func (i *inodeOperations) Remove(ctx context.Context, dir *fs.Inode, name string) error {
   394  	if len(name) > maxFilenameLen {
   395  		return linuxerr.ENAMETOOLONG
   396  	}
   397  
   398  	s := i.session()
   399  	var key *device.MultiDeviceKey
   400  	if s.overrides != nil {
   401  		// Find out if file being deleted is a socket or pipe that needs to be
   402  		// removed from endpoint map.
   403  		if d, err := i.Lookup(ctx, dir, name); err == nil {
   404  			defer d.DecRef(ctx)
   405  
   406  			if fs.IsSocket(d.Inode.StableAttr) || fs.IsPipe(d.Inode.StableAttr) {
   407  				switch iops := d.Inode.InodeOperations.(type) {
   408  				case *inodeOperations:
   409  					key = &iops.fileState.key
   410  				case *fifo:
   411  					key = &iops.fileIops.fileState.key
   412  				}
   413  
   414  				// Stabilize the override map while deletion is in progress.
   415  				s.overrides.lock()
   416  				defer s.overrides.unlock()
   417  			}
   418  		}
   419  	}
   420  
   421  	if err := i.fileState.file.unlinkAt(ctx, name, 0); err != nil {
   422  		return err
   423  	}
   424  	if key != nil {
   425  		s.overrides.remove(ctx, *key)
   426  	}
   427  	i.touchModificationAndStatusChangeTime(ctx, dir)
   428  
   429  	return nil
   430  }
   431  
   432  // Remove implements InodeOperations.RemoveDirectory.
   433  func (i *inodeOperations) RemoveDirectory(ctx context.Context, dir *fs.Inode, name string) error {
   434  	if len(name) > maxFilenameLen {
   435  		return linuxerr.ENAMETOOLONG
   436  	}
   437  
   438  	// 0x200 = AT_REMOVEDIR.
   439  	if err := i.fileState.file.unlinkAt(ctx, name, 0x200); err != nil {
   440  		return err
   441  	}
   442  
   443  	s := i.session()
   444  	if s.cachePolicy.cacheUAttrs(dir) {
   445  		// Decrease link count and updates atime.
   446  		i.cachingInodeOps.DecLinks(ctx)
   447  	}
   448  	if s.cachePolicy.cacheReaddir() {
   449  		// Invalidate readdir cache.
   450  		i.markDirectoryDirty()
   451  	}
   452  	return nil
   453  }
   454  
   455  // Rename renames this node.
   456  func (i *inodeOperations) Rename(ctx context.Context, inode *fs.Inode, oldParent *fs.Inode, oldName string, newParent *fs.Inode, newName string, replacement bool) error {
   457  	if len(newName) > maxFilenameLen {
   458  		return linuxerr.ENAMETOOLONG
   459  	}
   460  
   461  	// Don't allow renames across different mounts.
   462  	if newParent.MountSource != oldParent.MountSource {
   463  		return linuxerr.EXDEV
   464  	}
   465  
   466  	// Unwrap the new parent to a *inodeOperations.
   467  	newParentInodeOperations := newParent.InodeOperations.(*inodeOperations)
   468  
   469  	// Unwrap the old parent to a *inodeOperations.
   470  	oldParentInodeOperations := oldParent.InodeOperations.(*inodeOperations)
   471  
   472  	// Do the rename.
   473  	if err := i.fileState.file.rename(ctx, newParentInodeOperations.fileState.file, newName); err != nil {
   474  		return err
   475  	}
   476  
   477  	// Is the renamed entity a directory? Fix link counts.
   478  	s := i.session()
   479  	if fs.IsDir(i.fileState.sattr) {
   480  		// Update cached state.
   481  		if s.cachePolicy.cacheUAttrs(oldParent) {
   482  			oldParentInodeOperations.cachingInodeOps.DecLinks(ctx)
   483  		}
   484  		if s.cachePolicy.cacheUAttrs(newParent) {
   485  			// Only IncLinks if there is a new addition to
   486  			// newParent. If this is replacement, then the total
   487  			// count remains the same.
   488  			if !replacement {
   489  				newParentInodeOperations.cachingInodeOps.IncLinks(ctx)
   490  			}
   491  		}
   492  	}
   493  	if s.cachePolicy.cacheReaddir() {
   494  		// Mark old directory dirty.
   495  		oldParentInodeOperations.markDirectoryDirty()
   496  		if oldParent != newParent {
   497  			// Mark new directory dirty.
   498  			newParentInodeOperations.markDirectoryDirty()
   499  		}
   500  	}
   501  
   502  	// Rename always updates ctime.
   503  	if s.cachePolicy.cacheUAttrs(inode) {
   504  		i.cachingInodeOps.TouchStatusChangeTime(ctx)
   505  	}
   506  	return nil
   507  }
   508  
   509  func (i *inodeOperations) touchModificationAndStatusChangeTime(ctx context.Context, inode *fs.Inode) {
   510  	s := i.session()
   511  	if s.cachePolicy.cacheUAttrs(inode) {
   512  		i.cachingInodeOps.TouchModificationAndStatusChangeTime(ctx)
   513  	}
   514  	if s.cachePolicy.cacheReaddir() {
   515  		// Invalidate readdir cache.
   516  		i.markDirectoryDirty()
   517  	}
   518  }
   519  
   520  // markDirectoryDirty marks any cached data dirty for this directory. This is necessary in order
   521  // to ensure that this node does not retain stale state throughout its lifetime across multiple
   522  // open directory handles.
   523  //
   524  // Currently this means invalidating any readdir caches.
   525  func (i *inodeOperations) markDirectoryDirty() {
   526  	i.readdirMu.Lock()
   527  	defer i.readdirMu.Unlock()
   528  	i.readdirCache = nil
   529  }