github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fsimpl/ext/extent_file.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package ext
    16  
    17  import (
    18  	"io"
    19  	"sort"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    22  	"github.com/SagerNet/gvisor/pkg/sentry/fsimpl/ext/disklayout"
    23  	"github.com/SagerNet/gvisor/pkg/syserror"
    24  )
    25  
    26  // extentFile is a type of regular file which uses extents to store file data.
    27  //
    28  // +stateify savable
    29  type extentFile struct {
    30  	regFile regularFile
    31  
    32  	// root is the root extent node. This lives in the 60 byte diskInode.Data().
    33  	// Immutable.
    34  	root disklayout.ExtentNode
    35  }
    36  
    37  // Compiles only if extentFile implements io.ReaderAt.
    38  var _ io.ReaderAt = (*extentFile)(nil)
    39  
    40  // newExtentFile is the extent file constructor. It reads the entire extent
    41  // tree into memory.
    42  // TODO(b/134676337): Build extent tree on demand to reduce memory usage.
    43  func newExtentFile(args inodeArgs) (*extentFile, error) {
    44  	file := &extentFile{}
    45  	file.regFile.impl = file
    46  	file.regFile.inode.init(args, &file.regFile)
    47  	err := file.buildExtTree()
    48  	if err != nil {
    49  		return nil, err
    50  	}
    51  	return file, nil
    52  }
    53  
    54  // buildExtTree builds the extent tree by reading it from disk by doing
    55  // running a simple DFS. It first reads the root node from the inode struct in
    56  // memory. Then it recursively builds the rest of the tree by reading it off
    57  // disk.
    58  //
    59  // Precondition: inode flag InExtents must be set.
    60  func (f *extentFile) buildExtTree() error {
    61  	rootNodeData := f.regFile.inode.diskInode.Data()
    62  
    63  	f.root.Header.UnmarshalBytes(rootNodeData[:disklayout.ExtentHeaderSize])
    64  
    65  	// Root node can not have more than 4 entries: 60 bytes = 1 header + 4 entries.
    66  	if f.root.Header.NumEntries > 4 {
    67  		// read(2) specifies that EINVAL should be returned if the file is unsuitable
    68  		// for reading.
    69  		return linuxerr.EINVAL
    70  	}
    71  
    72  	f.root.Entries = make([]disklayout.ExtentEntryPair, f.root.Header.NumEntries)
    73  	for i, off := uint16(0), disklayout.ExtentEntrySize; i < f.root.Header.NumEntries; i, off = i+1, off+disklayout.ExtentEntrySize {
    74  		var curEntry disklayout.ExtentEntry
    75  		if f.root.Header.Height == 0 {
    76  			// Leaf node.
    77  			curEntry = &disklayout.Extent{}
    78  		} else {
    79  			// Internal node.
    80  			curEntry = &disklayout.ExtentIdx{}
    81  		}
    82  		curEntry.UnmarshalBytes(rootNodeData[off : off+disklayout.ExtentEntrySize])
    83  		f.root.Entries[i].Entry = curEntry
    84  	}
    85  
    86  	// If this node is internal, perform DFS.
    87  	if f.root.Header.Height > 0 {
    88  		for i := uint16(0); i < f.root.Header.NumEntries; i++ {
    89  			var err error
    90  			if f.root.Entries[i].Node, err = f.buildExtTreeFromDisk(f.root.Entries[i].Entry); err != nil {
    91  				return err
    92  			}
    93  		}
    94  	}
    95  
    96  	return nil
    97  }
    98  
    99  // buildExtTreeFromDisk reads the extent tree nodes from disk and recursively
   100  // builds the tree. Performs a simple DFS. It returns the ExtentNode pointed to
   101  // by the ExtentEntry.
   102  func (f *extentFile) buildExtTreeFromDisk(entry disklayout.ExtentEntry) (*disklayout.ExtentNode, error) {
   103  	var header disklayout.ExtentHeader
   104  	off := entry.PhysicalBlock() * f.regFile.inode.blkSize
   105  	err := readFromDisk(f.regFile.inode.fs.dev, int64(off), &header)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	entries := make([]disklayout.ExtentEntryPair, header.NumEntries)
   111  	for i, off := uint16(0), off+disklayout.ExtentEntrySize; i < header.NumEntries; i, off = i+1, off+disklayout.ExtentEntrySize {
   112  		var curEntry disklayout.ExtentEntry
   113  		if header.Height == 0 {
   114  			// Leaf node.
   115  			curEntry = &disklayout.Extent{}
   116  		} else {
   117  			// Internal node.
   118  			curEntry = &disklayout.ExtentIdx{}
   119  		}
   120  
   121  		err := readFromDisk(f.regFile.inode.fs.dev, int64(off), curEntry)
   122  		if err != nil {
   123  			return nil, err
   124  		}
   125  		entries[i].Entry = curEntry
   126  	}
   127  
   128  	// If this node is internal, perform DFS.
   129  	if header.Height > 0 {
   130  		for i := uint16(0); i < header.NumEntries; i++ {
   131  			var err error
   132  			entries[i].Node, err = f.buildExtTreeFromDisk(entries[i].Entry)
   133  			if err != nil {
   134  				return nil, err
   135  			}
   136  		}
   137  	}
   138  
   139  	return &disklayout.ExtentNode{header, entries}, nil
   140  }
   141  
   142  // ReadAt implements io.ReaderAt.ReadAt.
   143  func (f *extentFile) ReadAt(dst []byte, off int64) (int, error) {
   144  	if len(dst) == 0 {
   145  		return 0, nil
   146  	}
   147  
   148  	if off < 0 {
   149  		return 0, linuxerr.EINVAL
   150  	}
   151  
   152  	if uint64(off) >= f.regFile.inode.diskInode.Size() {
   153  		return 0, io.EOF
   154  	}
   155  
   156  	n, err := f.read(&f.root, uint64(off), dst)
   157  	if n < len(dst) && err == nil {
   158  		err = io.EOF
   159  	}
   160  	return n, err
   161  }
   162  
   163  // read is the recursive step of extentFile.ReadAt which traverses the extent
   164  // tree from the node passed and reads file data.
   165  func (f *extentFile) read(node *disklayout.ExtentNode, off uint64, dst []byte) (int, error) {
   166  	// Perform a binary search for the node covering bytes starting at r.fileOff.
   167  	// A highly fragmented filesystem can have upto 340 entries and so linear
   168  	// search should be avoided. Finds the first entry which does not cover the
   169  	// file block we want and subtracts 1 to get the desired index.
   170  	fileBlk := uint32(off / f.regFile.inode.blkSize)
   171  	n := len(node.Entries)
   172  	found := sort.Search(n, func(i int) bool {
   173  		return node.Entries[i].Entry.FileBlock() > fileBlk
   174  	}) - 1
   175  
   176  	// We should be in this recursive step only if the data we want exists under
   177  	// the current node.
   178  	if found < 0 {
   179  		panic("searching for a file block in an extent entry which does not cover it")
   180  	}
   181  
   182  	read := 0
   183  	toRead := len(dst)
   184  	var curR int
   185  	var err error
   186  	for i := found; i < n && read < toRead; i++ {
   187  		if node.Header.Height == 0 {
   188  			curR, err = f.readFromExtent(node.Entries[i].Entry.(*disklayout.Extent), off, dst[read:])
   189  		} else {
   190  			curR, err = f.read(node.Entries[i].Node, off, dst[read:])
   191  		}
   192  
   193  		read += curR
   194  		off += uint64(curR)
   195  		if err != nil {
   196  			return read, err
   197  		}
   198  	}
   199  
   200  	return read, nil
   201  }
   202  
   203  // readFromExtent reads file data from the extent. It takes advantage of the
   204  // sequential nature of extents and reads file data from multiple blocks in one
   205  // call.
   206  //
   207  // A non-nil error indicates that this is a partial read and there is probably
   208  // more to read from this extent. The caller should propagate the error upward
   209  // and not move to the next extent in the tree.
   210  //
   211  // A subsequent call to extentReader.Read should continue reading from where we
   212  // left off as expected.
   213  func (f *extentFile) readFromExtent(ex *disklayout.Extent, off uint64, dst []byte) (int, error) {
   214  	curFileBlk := uint32(off / f.regFile.inode.blkSize)
   215  	exFirstFileBlk := ex.FileBlock()
   216  	exLastFileBlk := exFirstFileBlk + uint32(ex.Length) // This is exclusive.
   217  
   218  	// We should be in this recursive step only if the data we want exists under
   219  	// the current extent.
   220  	if curFileBlk < exFirstFileBlk || exLastFileBlk <= curFileBlk {
   221  		panic("searching for a file block in an extent which does not cover it")
   222  	}
   223  
   224  	curPhyBlk := uint64(curFileBlk-exFirstFileBlk) + ex.PhysicalBlock()
   225  	readStart := curPhyBlk*f.regFile.inode.blkSize + (off % f.regFile.inode.blkSize)
   226  
   227  	endPhyBlk := ex.PhysicalBlock() + uint64(ex.Length)
   228  	extentEnd := endPhyBlk * f.regFile.inode.blkSize // This is exclusive.
   229  
   230  	toRead := int(extentEnd - readStart)
   231  	if len(dst) < toRead {
   232  		toRead = len(dst)
   233  	}
   234  
   235  	n, _ := f.regFile.inode.fs.dev.ReadAt(dst[:toRead], int64(readStart))
   236  	if n < toRead {
   237  		return n, syserror.EIO
   238  	}
   239  	return n, nil
   240  }