github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/gofer/directory.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gofer
    16  
    17  import (
    18  	"fmt"
    19  	"sync/atomic"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    22  	"github.com/SagerNet/gvisor/pkg/context"
    23  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    24  	"github.com/SagerNet/gvisor/pkg/hostarch"
    25  	"github.com/SagerNet/gvisor/pkg/p9"
    26  	"github.com/SagerNet/gvisor/pkg/refsvfs2"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/auth"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/pipe"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/socket/unix/transport"
    30  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    31  	"github.com/SagerNet/gvisor/pkg/sync"
    32  )
    33  
    34  func (d *dentry) isDir() bool {
    35  	return d.fileType() == linux.S_IFDIR
    36  }
    37  
    38  // Preconditions:
    39  // * filesystem.renameMu must be locked.
    40  // * d.dirMu must be locked.
    41  // * d.isDir().
    42  // * child must be a newly-created dentry that has never had a parent.
    43  func (d *dentry) cacheNewChildLocked(child *dentry, name string) {
    44  	d.IncRef() // reference held by child on its parent
    45  	child.parent = d
    46  	child.name = name
    47  	if d.children == nil {
    48  		d.children = make(map[string]*dentry)
    49  	}
    50  	d.children[name] = child
    51  }
    52  
    53  // Preconditions:
    54  // * d.dirMu must be locked.
    55  // * d.isDir().
    56  func (d *dentry) cacheNegativeLookupLocked(name string) {
    57  	// Don't cache negative lookups if InteropModeShared is in effect (since
    58  	// this makes remote lookup unavoidable), or if d.isSynthetic() (in which
    59  	// case the only files in the directory are those for which a dentry exists
    60  	// in d.children). Instead, just delete any previously-cached dentry.
    61  	if d.fs.opts.interop == InteropModeShared || d.isSynthetic() {
    62  		delete(d.children, name)
    63  		return
    64  	}
    65  	if d.children == nil {
    66  		d.children = make(map[string]*dentry)
    67  	}
    68  	d.children[name] = nil
    69  }
    70  
    71  type createSyntheticOpts struct {
    72  	name string
    73  	mode linux.FileMode
    74  	kuid auth.KUID
    75  	kgid auth.KGID
    76  
    77  	// The endpoint for a synthetic socket. endpoint should be nil if the file
    78  	// being created is not a socket.
    79  	endpoint transport.BoundEndpoint
    80  
    81  	// pipe should be nil if the file being created is not a pipe.
    82  	pipe *pipe.VFSPipe
    83  }
    84  
    85  // createSyntheticChildLocked creates a synthetic file with the given name
    86  // in d.
    87  //
    88  // Preconditions:
    89  // * d.dirMu must be locked.
    90  // * d.isDir().
    91  // * d does not already contain a child with the given name.
    92  func (d *dentry) createSyntheticChildLocked(opts *createSyntheticOpts) {
    93  	now := d.fs.clock.Now().Nanoseconds()
    94  	child := &dentry{
    95  		refs:      1, // held by d
    96  		fs:        d.fs,
    97  		ino:       d.fs.nextIno(),
    98  		mode:      uint32(opts.mode),
    99  		uid:       uint32(opts.kuid),
   100  		gid:       uint32(opts.kgid),
   101  		blockSize: hostarch.PageSize, // arbitrary
   102  		atime:     now,
   103  		mtime:     now,
   104  		ctime:     now,
   105  		btime:     now,
   106  		readFD:    -1,
   107  		writeFD:   -1,
   108  		mmapFD:    -1,
   109  		nlink:     uint32(2),
   110  	}
   111  	refsvfs2.Register(child)
   112  	switch opts.mode.FileType() {
   113  	case linux.S_IFDIR:
   114  		// Nothing else needs to be done.
   115  	case linux.S_IFSOCK:
   116  		child.endpoint = opts.endpoint
   117  	case linux.S_IFIFO:
   118  		child.pipe = opts.pipe
   119  	default:
   120  		panic(fmt.Sprintf("failed to create synthetic file of unrecognized type: %v", opts.mode.FileType()))
   121  	}
   122  	child.pf.dentry = child
   123  	child.vfsd.Init(child)
   124  
   125  	d.cacheNewChildLocked(child, opts.name)
   126  	d.syntheticChildren++
   127  }
   128  
   129  // +stateify savable
   130  type directoryFD struct {
   131  	fileDescription
   132  	vfs.DirectoryFileDescriptionDefaultImpl
   133  
   134  	mu      sync.Mutex `state:"nosave"`
   135  	off     int64
   136  	dirents []vfs.Dirent
   137  }
   138  
   139  // Release implements vfs.FileDescriptionImpl.Release.
   140  func (fd *directoryFD) Release(context.Context) {
   141  }
   142  
   143  // IterDirents implements vfs.FileDescriptionImpl.IterDirents.
   144  func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
   145  	fd.mu.Lock()
   146  	defer fd.mu.Unlock()
   147  
   148  	d := fd.dentry()
   149  	if fd.dirents == nil {
   150  		ds, err := d.getDirents(ctx)
   151  		if err != nil {
   152  			return err
   153  		}
   154  		fd.dirents = ds
   155  	}
   156  
   157  	d.InotifyWithParent(ctx, linux.IN_ACCESS, 0, vfs.PathEvent)
   158  	if d.cachedMetadataAuthoritative() {
   159  		d.touchAtime(fd.vfsfd.Mount())
   160  	}
   161  
   162  	for fd.off < int64(len(fd.dirents)) {
   163  		if err := cb.Handle(fd.dirents[fd.off]); err != nil {
   164  			return err
   165  		}
   166  		fd.off++
   167  	}
   168  	return nil
   169  }
   170  
   171  // Preconditions:
   172  // * d.isDir().
   173  // * There exists at least one directoryFD representing d.
   174  func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
   175  	// NOTE(b/135560623): 9P2000.L's readdir does not specify behavior in the
   176  	// presence of concurrent mutation of an iterated directory, so
   177  	// implementations may duplicate or omit entries in this case, which
   178  	// violates POSIX semantics. Thus we read all directory entries while
   179  	// holding d.dirMu to exclude directory mutations. (Note that it is
   180  	// impossible for the client to exclude concurrent mutation from other
   181  	// remote filesystem users. Since there is no way to detect if the server
   182  	// has incorrectly omitted directory entries, we simply assume that the
   183  	// server is well-behaved under InteropModeShared.) This is inconsistent
   184  	// with Linux (which appears to assume that directory fids have the correct
   185  	// semantics, and translates struct file_operations::readdir calls directly
   186  	// to readdir RPCs), but is consistent with VFS1.
   187  
   188  	// filesystem.renameMu is needed for d.parent, and must be locked before
   189  	// dentry.dirMu.
   190  	d.fs.renameMu.RLock()
   191  	defer d.fs.renameMu.RUnlock()
   192  	d.dirMu.Lock()
   193  	defer d.dirMu.Unlock()
   194  	if d.dirents != nil {
   195  		return d.dirents, nil
   196  	}
   197  
   198  	// It's not clear if 9P2000.L's readdir is expected to return "." and "..",
   199  	// so we generate them here.
   200  	parent := genericParentOrSelf(d)
   201  	dirents := []vfs.Dirent{
   202  		{
   203  			Name:    ".",
   204  			Type:    linux.DT_DIR,
   205  			Ino:     uint64(d.ino),
   206  			NextOff: 1,
   207  		},
   208  		{
   209  			Name:    "..",
   210  			Type:    uint8(atomic.LoadUint32(&parent.mode) >> 12),
   211  			Ino:     uint64(parent.ino),
   212  			NextOff: 2,
   213  		},
   214  	}
   215  	var realChildren map[string]struct{}
   216  	if !d.isSynthetic() {
   217  		if d.syntheticChildren != 0 && d.fs.opts.interop == InteropModeShared {
   218  			// Record the set of children d actually has so that we don't emit
   219  			// duplicate entries for synthetic children.
   220  			realChildren = make(map[string]struct{})
   221  		}
   222  		off := uint64(0)
   223  		const count = 64 * 1024 // for consistency with the vfs1 client
   224  		d.handleMu.RLock()
   225  		if d.readFile.isNil() {
   226  			// This should not be possible because a readable handle should
   227  			// have been opened when the calling directoryFD was opened.
   228  			d.handleMu.RUnlock()
   229  			panic("gofer.dentry.getDirents called without a readable handle")
   230  		}
   231  		for {
   232  			p9ds, err := d.readFile.readdir(ctx, off, count)
   233  			if err != nil {
   234  				d.handleMu.RUnlock()
   235  				return nil, err
   236  			}
   237  			if len(p9ds) == 0 {
   238  				d.handleMu.RUnlock()
   239  				break
   240  			}
   241  			for _, p9d := range p9ds {
   242  				if p9d.Name == "." || p9d.Name == ".." {
   243  					continue
   244  				}
   245  				dirent := vfs.Dirent{
   246  					Name:    p9d.Name,
   247  					Ino:     d.fs.inoFromQIDPath(p9d.QID.Path),
   248  					NextOff: int64(len(dirents) + 1),
   249  				}
   250  				// p9 does not expose 9P2000.U's DMDEVICE, DMNAMEDPIPE, or
   251  				// DMSOCKET.
   252  				switch p9d.Type {
   253  				case p9.TypeSymlink:
   254  					dirent.Type = linux.DT_LNK
   255  				case p9.TypeDir:
   256  					dirent.Type = linux.DT_DIR
   257  				default:
   258  					dirent.Type = linux.DT_REG
   259  				}
   260  				dirents = append(dirents, dirent)
   261  				if realChildren != nil {
   262  					realChildren[p9d.Name] = struct{}{}
   263  				}
   264  			}
   265  			off = p9ds[len(p9ds)-1].Offset
   266  		}
   267  	}
   268  	// Emit entries for synthetic children.
   269  	if d.syntheticChildren != 0 {
   270  		for _, child := range d.children {
   271  			if child == nil || !child.isSynthetic() {
   272  				continue
   273  			}
   274  			if _, ok := realChildren[child.name]; ok {
   275  				continue
   276  			}
   277  			dirents = append(dirents, vfs.Dirent{
   278  				Name:    child.name,
   279  				Type:    uint8(atomic.LoadUint32(&child.mode) >> 12),
   280  				Ino:     uint64(child.ino),
   281  				NextOff: int64(len(dirents) + 1),
   282  			})
   283  		}
   284  	}
   285  	// Cache dirents for future directoryFDs if permitted.
   286  	if d.cachedMetadataAuthoritative() {
   287  		d.dirents = dirents
   288  	}
   289  	return dirents, nil
   290  }
   291  
   292  // Seek implements vfs.FileDescriptionImpl.Seek.
   293  func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
   294  	fd.mu.Lock()
   295  	defer fd.mu.Unlock()
   296  
   297  	switch whence {
   298  	case linux.SEEK_SET:
   299  		if offset < 0 {
   300  			return 0, linuxerr.EINVAL
   301  		}
   302  		if offset == 0 {
   303  			// Ensure that the next call to fd.IterDirents() calls
   304  			// fd.dentry().getDirents().
   305  			fd.dirents = nil
   306  		}
   307  		fd.off = offset
   308  		return fd.off, nil
   309  	case linux.SEEK_CUR:
   310  		offset += fd.off
   311  		if offset < 0 {
   312  			return 0, linuxerr.EINVAL
   313  		}
   314  		// Don't clear fd.dirents in this case, even if offset == 0.
   315  		fd.off = offset
   316  		return fd.off, nil
   317  	default:
   318  		return 0, linuxerr.EINVAL
   319  	}
   320  }
   321  
   322  // Sync implements vfs.FileDescriptionImpl.Sync.
   323  func (fd *directoryFD) Sync(ctx context.Context) error {
   324  	return fd.dentry().syncRemoteFile(ctx)
   325  }