github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/ext/directory.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package ext
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    19  	"github.com/SagerNet/gvisor/pkg/context"
    20  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    21  	"github.com/SagerNet/gvisor/pkg/log"
    22  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/ext/disklayout"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/vfs"
    25  	"github.com/SagerNet/gvisor/pkg/sync"
    26  )
    27  
    28  // directory represents a directory inode. It holds the childList in memory.
    29  //
    30  // +stateify savable
    31  type directory struct {
    32  	inode inode
    33  
    34  	// childCache maps filenames to dentries for children for which dentries
    35  	// have been instantiated. childCache is protected by filesystem.mu.
    36  	childCache map[string]*dentry
    37  
    38  	// mu serializes the changes to childList.
    39  	// Lock Order (outermost locks must be taken first):
    40  	//   directory.mu
    41  	//     filesystem.mu
    42  	mu sync.Mutex `state:"nosave"`
    43  
    44  	// childList is a list containing (1) child dirents and (2) fake dirents
    45  	// (with diskDirent == nil) that represent the iteration position of
    46  	// directoryFDs. childList is used to support directoryFD.IterDirents()
    47  	// efficiently. childList is protected by mu.
    48  	childList direntList
    49  
    50  	// childMap maps the child's filename to the dirent structure stored in
    51  	// childList. This adds some data replication but helps in faster path
    52  	// traversal. For consistency, key == childMap[key].diskDirent.FileName().
    53  	// Immutable.
    54  	childMap map[string]*dirent
    55  }
    56  
    57  // newDirectory is the directory constructor.
    58  func newDirectory(args inodeArgs, newDirent bool) (*directory, error) {
    59  	file := &directory{
    60  		childCache: make(map[string]*dentry),
    61  		childMap:   make(map[string]*dirent),
    62  	}
    63  	file.inode.init(args, file)
    64  
    65  	// Initialize childList by reading dirents from the underlying file.
    66  	if args.diskInode.Flags().Index {
    67  		// TODO(b/134676337): Support hash tree directories. Currently only the '.'
    68  		// and '..' entries are read in.
    69  
    70  		// Users cannot navigate this hash tree directory yet.
    71  		log.Warningf("hash tree directory being used which is unsupported")
    72  		return file, nil
    73  	}
    74  
    75  	// The dirents are organized in a linear array in the file data.
    76  	// Extract the file data and decode the dirents.
    77  	regFile, err := newRegularFile(args)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  
    82  	// buf is used as scratch space for reading in dirents from disk and
    83  	// unmarshalling them into dirent structs.
    84  	buf := make([]byte, disklayout.DirentSize)
    85  	size := args.diskInode.Size()
    86  	for off, inc := uint64(0), uint64(0); off < size; off += inc {
    87  		toRead := size - off
    88  		if toRead > disklayout.DirentSize {
    89  			toRead = disklayout.DirentSize
    90  		}
    91  		if n, err := regFile.impl.ReadAt(buf[:toRead], int64(off)); uint64(n) < toRead {
    92  			return nil, err
    93  		}
    94  
    95  		var curDirent dirent
    96  		if newDirent {
    97  			curDirent.diskDirent = &disklayout.DirentNew{}
    98  		} else {
    99  			curDirent.diskDirent = &disklayout.DirentOld{}
   100  		}
   101  		curDirent.diskDirent.UnmarshalBytes(buf)
   102  
   103  		if curDirent.diskDirent.Inode() != 0 && len(curDirent.diskDirent.FileName()) != 0 {
   104  			// Inode number and name length fields being set to 0 is used to indicate
   105  			// an unused dirent.
   106  			file.childList.PushBack(&curDirent)
   107  			file.childMap[curDirent.diskDirent.FileName()] = &curDirent
   108  		}
   109  
   110  		// The next dirent is placed exactly after this dirent record on disk.
   111  		inc = uint64(curDirent.diskDirent.RecordSize())
   112  	}
   113  
   114  	return file, nil
   115  }
   116  
   117  func (i *inode) isDir() bool {
   118  	_, ok := i.impl.(*directory)
   119  	return ok
   120  }
   121  
   122  // dirent is the directory.childList node.
   123  //
   124  // +stateify savable
   125  type dirent struct {
   126  	diskDirent disklayout.Dirent
   127  
   128  	// direntEntry links dirents into their parent directory.childList.
   129  	direntEntry
   130  }
   131  
   132  // directoryFD represents a directory file description. It implements
   133  // vfs.FileDescriptionImpl.
   134  //
   135  // +stateify savable
   136  type directoryFD struct {
   137  	fileDescription
   138  	vfs.DirectoryFileDescriptionDefaultImpl
   139  
   140  	// Protected by directory.mu.
   141  	iter *dirent
   142  	off  int64
   143  }
   144  
   145  // Compiles only if directoryFD implements vfs.FileDescriptionImpl.
   146  var _ vfs.FileDescriptionImpl = (*directoryFD)(nil)
   147  
   148  // Release implements vfs.FileDescriptionImpl.Release.
   149  func (fd *directoryFD) Release(ctx context.Context) {
   150  	if fd.iter == nil {
   151  		return
   152  	}
   153  
   154  	dir := fd.inode().impl.(*directory)
   155  	dir.mu.Lock()
   156  	dir.childList.Remove(fd.iter)
   157  	dir.mu.Unlock()
   158  	fd.iter = nil
   159  }
   160  
   161  // IterDirents implements vfs.FileDescriptionImpl.IterDirents.
   162  func (fd *directoryFD) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback) error {
   163  	extfs := fd.filesystem()
   164  	dir := fd.inode().impl.(*directory)
   165  
   166  	dir.mu.Lock()
   167  	defer dir.mu.Unlock()
   168  
   169  	// Ensure that fd.iter exists and is not linked into dir.childList.
   170  	var child *dirent
   171  	if fd.iter == nil {
   172  		// Start iteration at the beginning of dir.
   173  		child = dir.childList.Front()
   174  		fd.iter = &dirent{}
   175  	} else {
   176  		// Continue iteration from where we left off.
   177  		child = fd.iter.Next()
   178  		dir.childList.Remove(fd.iter)
   179  	}
   180  	for ; child != nil; child = child.Next() {
   181  		// Skip other directoryFD iterators.
   182  		if child.diskDirent != nil {
   183  			childType, ok := child.diskDirent.FileType()
   184  			if !ok {
   185  				// We will need to read the inode off disk. Do not increment
   186  				// ref count here because this inode is not being added to the
   187  				// dentry tree.
   188  				extfs.mu.Lock()
   189  				childInode, err := extfs.getOrCreateInodeLocked(child.diskDirent.Inode())
   190  				extfs.mu.Unlock()
   191  				if err != nil {
   192  					// Usage of the file description after the error is
   193  					// undefined. This implementation would continue reading
   194  					// from the next dirent.
   195  					fd.off++
   196  					dir.childList.InsertAfter(child, fd.iter)
   197  					return err
   198  				}
   199  				childType = fs.ToInodeType(childInode.diskInode.Mode().FileType())
   200  			}
   201  
   202  			if err := cb.Handle(vfs.Dirent{
   203  				Name:    child.diskDirent.FileName(),
   204  				Type:    fs.ToDirentType(childType),
   205  				Ino:     uint64(child.diskDirent.Inode()),
   206  				NextOff: fd.off + 1,
   207  			}); err != nil {
   208  				dir.childList.InsertBefore(child, fd.iter)
   209  				return err
   210  			}
   211  			fd.off++
   212  		}
   213  	}
   214  	dir.childList.PushBack(fd.iter)
   215  	return nil
   216  }
   217  
   218  // Seek implements vfs.FileDescriptionImpl.Seek.
   219  func (fd *directoryFD) Seek(ctx context.Context, offset int64, whence int32) (int64, error) {
   220  	if whence != linux.SEEK_SET && whence != linux.SEEK_CUR {
   221  		return 0, linuxerr.EINVAL
   222  	}
   223  
   224  	dir := fd.inode().impl.(*directory)
   225  
   226  	dir.mu.Lock()
   227  	defer dir.mu.Unlock()
   228  
   229  	// Find resulting offset.
   230  	if whence == linux.SEEK_CUR {
   231  		offset += fd.off
   232  	}
   233  
   234  	if offset < 0 {
   235  		// lseek(2) specifies that EINVAL should be returned if the resulting offset
   236  		// is negative.
   237  		return 0, linuxerr.EINVAL
   238  	}
   239  
   240  	n := int64(len(dir.childMap))
   241  	realWantOff := offset
   242  	if realWantOff > n {
   243  		realWantOff = n
   244  	}
   245  	realCurOff := fd.off
   246  	if realCurOff > n {
   247  		realCurOff = n
   248  	}
   249  
   250  	// Ensure that fd.iter exists and is linked into dir.childList so we can
   251  	// intelligently seek from the optimal position.
   252  	if fd.iter == nil {
   253  		fd.iter = &dirent{}
   254  		dir.childList.PushFront(fd.iter)
   255  	}
   256  
   257  	// Guess that iterating from the current position is optimal.
   258  	child := fd.iter
   259  	diff := realWantOff - realCurOff // Shows direction and magnitude of travel.
   260  
   261  	// See if starting from the beginning or end is better.
   262  	abDiff := diff
   263  	if diff < 0 {
   264  		abDiff = -diff
   265  	}
   266  	if abDiff > realWantOff {
   267  		// Starting from the beginning is best.
   268  		child = dir.childList.Front()
   269  		diff = realWantOff
   270  	} else if abDiff > (n - realWantOff) {
   271  		// Starting from the end is best.
   272  		child = dir.childList.Back()
   273  		// (n - 1) because the last non-nil dirent represents the (n-1)th offset.
   274  		diff = realWantOff - (n - 1)
   275  	}
   276  
   277  	for child != nil {
   278  		// Skip other directoryFD iterators.
   279  		if child.diskDirent != nil {
   280  			if diff == 0 {
   281  				if child != fd.iter {
   282  					dir.childList.Remove(fd.iter)
   283  					dir.childList.InsertBefore(child, fd.iter)
   284  				}
   285  
   286  				fd.off = offset
   287  				return offset, nil
   288  			}
   289  
   290  			if diff < 0 {
   291  				diff++
   292  				child = child.Prev()
   293  			} else {
   294  				diff--
   295  				child = child.Next()
   296  			}
   297  			continue
   298  		}
   299  
   300  		if diff < 0 {
   301  			child = child.Prev()
   302  		} else {
   303  			child = child.Next()
   304  		}
   305  	}
   306  
   307  	// Reaching here indicates that the offset is beyond the end of the childList.
   308  	dir.childList.Remove(fd.iter)
   309  	dir.childList.PushBack(fd.iter)
   310  	fd.off = offset
   311  	return offset, nil
   312  }