github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/sys_read.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"time"
    19  
    20  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    21  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    22  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    25  	ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/socket"
    27  	"github.com/SagerNet/gvisor/pkg/syserror"
    28  	"github.com/SagerNet/gvisor/pkg/usermem"
    29  	"github.com/SagerNet/gvisor/pkg/waiter"
    30  )
    31  
    32  // LINT.IfChange
    33  
    34  const (
    35  	// EventMaskRead contains events that can be triggered on reads.
    36  	EventMaskRead = waiter.ReadableEvents | waiter.EventHUp | waiter.EventErr
    37  )
    38  
    39  // Read implements linux syscall read(2).  Note that we try to get a buffer that
    40  // is exactly the size requested because some applications like qemu expect
    41  // they can do large reads all at once.  Bug for bug.  Same for other read
    42  // calls below.
    43  func Read(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    44  	fd := args[0].Int()
    45  	addr := args[1].Pointer()
    46  	size := args[2].SizeT()
    47  
    48  	file := t.GetFile(fd)
    49  	if file == nil {
    50  		return 0, nil, linuxerr.EBADF
    51  	}
    52  	defer file.DecRef(t)
    53  
    54  	// Check that the file is readable.
    55  	if !file.Flags().Read {
    56  		return 0, nil, linuxerr.EBADF
    57  	}
    58  
    59  	// Check that the size is legitimate.
    60  	si := int(size)
    61  	if si < 0 {
    62  		return 0, nil, linuxerr.EINVAL
    63  	}
    64  
    65  	// Get the destination of the read.
    66  	dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
    67  		AddressSpaceActive: true,
    68  	})
    69  	if err != nil {
    70  		return 0, nil, err
    71  	}
    72  
    73  	n, err := readv(t, file, dst)
    74  	t.IOUsage().AccountReadSyscall(n)
    75  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "read", file)
    76  }
    77  
    78  // Readahead implements readahead(2).
    79  func Readahead(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    80  	fd := args[0].Int()
    81  	offset := args[1].Int64()
    82  	size := args[2].SizeT()
    83  
    84  	file := t.GetFile(fd)
    85  	if file == nil {
    86  		return 0, nil, linuxerr.EBADF
    87  	}
    88  	defer file.DecRef(t)
    89  
    90  	// Check that the file is readable.
    91  	if !file.Flags().Read {
    92  		return 0, nil, linuxerr.EBADF
    93  	}
    94  
    95  	// Check that the size is valid.
    96  	if int(size) < 0 {
    97  		return 0, nil, linuxerr.EINVAL
    98  	}
    99  
   100  	// Check that the offset is legitimate and does not overflow.
   101  	if offset < 0 || offset+int64(size) < 0 {
   102  		return 0, nil, linuxerr.EINVAL
   103  	}
   104  
   105  	// Return EINVAL; if the underlying file type does not support readahead,
   106  	// then Linux will return EINVAL to indicate as much. In the future, we
   107  	// may extend this function to actually support readahead hints.
   108  	return 0, nil, linuxerr.EINVAL
   109  }
   110  
   111  // Pread64 implements linux syscall pread64(2).
   112  func Pread64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   113  	fd := args[0].Int()
   114  	addr := args[1].Pointer()
   115  	size := args[2].SizeT()
   116  	offset := args[3].Int64()
   117  
   118  	file := t.GetFile(fd)
   119  	if file == nil {
   120  		return 0, nil, linuxerr.EBADF
   121  	}
   122  	defer file.DecRef(t)
   123  
   124  	// Check that the offset is legitimate and does not overflow.
   125  	if offset < 0 || offset+int64(size) < 0 {
   126  		return 0, nil, linuxerr.EINVAL
   127  	}
   128  
   129  	// Is reading at an offset supported?
   130  	if !file.Flags().Pread {
   131  		return 0, nil, linuxerr.ESPIPE
   132  	}
   133  
   134  	// Check that the file is readable.
   135  	if !file.Flags().Read {
   136  		return 0, nil, linuxerr.EBADF
   137  	}
   138  
   139  	// Check that the size is legitimate.
   140  	si := int(size)
   141  	if si < 0 {
   142  		return 0, nil, linuxerr.EINVAL
   143  	}
   144  
   145  	// Get the destination of the read.
   146  	dst, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
   147  		AddressSpaceActive: true,
   148  	})
   149  	if err != nil {
   150  		return 0, nil, err
   151  	}
   152  
   153  	n, err := preadv(t, file, dst, offset)
   154  	t.IOUsage().AccountReadSyscall(n)
   155  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pread64", file)
   156  }
   157  
   158  // Readv implements linux syscall readv(2).
   159  func Readv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   160  	fd := args[0].Int()
   161  	addr := args[1].Pointer()
   162  	iovcnt := int(args[2].Int())
   163  
   164  	file := t.GetFile(fd)
   165  	if file == nil {
   166  		return 0, nil, linuxerr.EBADF
   167  	}
   168  	defer file.DecRef(t)
   169  
   170  	// Check that the file is readable.
   171  	if !file.Flags().Read {
   172  		return 0, nil, linuxerr.EBADF
   173  	}
   174  
   175  	// Read the iovecs that specify the destination of the read.
   176  	dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
   177  		AddressSpaceActive: true,
   178  	})
   179  	if err != nil {
   180  		return 0, nil, err
   181  	}
   182  
   183  	n, err := readv(t, file, dst)
   184  	t.IOUsage().AccountReadSyscall(n)
   185  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "readv", file)
   186  }
   187  
   188  // Preadv implements linux syscall preadv(2).
   189  func Preadv(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   190  	fd := args[0].Int()
   191  	addr := args[1].Pointer()
   192  	iovcnt := int(args[2].Int())
   193  	offset := args[3].Int64()
   194  
   195  	file := t.GetFile(fd)
   196  	if file == nil {
   197  		return 0, nil, linuxerr.EBADF
   198  	}
   199  	defer file.DecRef(t)
   200  
   201  	// Check that the offset is legitimate.
   202  	if offset < 0 {
   203  		return 0, nil, linuxerr.EINVAL
   204  	}
   205  
   206  	// Is reading at an offset supported?
   207  	if !file.Flags().Pread {
   208  		return 0, nil, linuxerr.ESPIPE
   209  	}
   210  
   211  	// Check that the file is readable.
   212  	if !file.Flags().Read {
   213  		return 0, nil, linuxerr.EBADF
   214  	}
   215  
   216  	// Read the iovecs that specify the destination of the read.
   217  	dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
   218  		AddressSpaceActive: true,
   219  	})
   220  	if err != nil {
   221  		return 0, nil, err
   222  	}
   223  
   224  	n, err := preadv(t, file, dst, offset)
   225  	t.IOUsage().AccountReadSyscall(n)
   226  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv", file)
   227  }
   228  
   229  // Preadv2 implements linux syscall preadv2(2).
   230  func Preadv2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   231  	// While the syscall is
   232  	// preadv2(int fd, struct iovec* iov, int iov_cnt, off_t offset, int flags)
   233  	// the linux internal call
   234  	// (https://elixir.bootlin.com/linux/v4.18/source/fs/read_write.c#L1248)
   235  	// splits the offset argument into a high/low value for compatibility with
   236  	// 32-bit architectures. The flags argument is the 5th argument.
   237  
   238  	fd := args[0].Int()
   239  	addr := args[1].Pointer()
   240  	iovcnt := int(args[2].Int())
   241  	offset := args[3].Int64()
   242  	flags := int(args[5].Int())
   243  
   244  	file := t.GetFile(fd)
   245  	if file == nil {
   246  		return 0, nil, linuxerr.EBADF
   247  	}
   248  	defer file.DecRef(t)
   249  
   250  	// Check that the offset is legitimate.
   251  	if offset < -1 {
   252  		return 0, nil, linuxerr.EINVAL
   253  	}
   254  
   255  	// Is reading at an offset supported?
   256  	if offset > -1 && !file.Flags().Pread {
   257  		return 0, nil, linuxerr.ESPIPE
   258  	}
   259  
   260  	// Check that the file is readable.
   261  	if !file.Flags().Read {
   262  		return 0, nil, linuxerr.EBADF
   263  	}
   264  
   265  	// Check flags field.
   266  	// Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
   267  	// accepted as a valid flag argument for preadv2.
   268  	if flags&^linux.RWF_VALID != 0 {
   269  		return 0, nil, syserror.EOPNOTSUPP
   270  	}
   271  
   272  	// Read the iovecs that specify the destination of the read.
   273  	dst, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
   274  		AddressSpaceActive: true,
   275  	})
   276  	if err != nil {
   277  		return 0, nil, err
   278  	}
   279  
   280  	// If preadv2 is called with an offset of -1, readv is called.
   281  	if offset == -1 {
   282  		n, err := readv(t, file, dst)
   283  		t.IOUsage().AccountReadSyscall(n)
   284  		return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file)
   285  	}
   286  
   287  	n, err := preadv(t, file, dst, offset)
   288  	t.IOUsage().AccountReadSyscall(n)
   289  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "preadv2", file)
   290  }
   291  
   292  func readv(t *kernel.Task, f *fs.File, dst usermem.IOSequence) (int64, error) {
   293  	n, err := f.Readv(t, dst)
   294  	if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
   295  		if n > 0 {
   296  			// Queue notification if we read anything.
   297  			f.Dirent.InotifyEvent(linux.IN_ACCESS, 0)
   298  		}
   299  		return n, err
   300  	}
   301  
   302  	// Sockets support read timeouts.
   303  	var haveDeadline bool
   304  	var deadline ktime.Time
   305  	if s, ok := f.FileOperations.(socket.Socket); ok {
   306  		dl := s.RecvTimeout()
   307  		if dl < 0 && err == syserror.ErrWouldBlock {
   308  			return n, err
   309  		}
   310  		if dl > 0 {
   311  			deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond)
   312  			haveDeadline = true
   313  		}
   314  	}
   315  
   316  	// Register for notifications.
   317  	w, ch := waiter.NewChannelEntry(nil)
   318  	f.EventRegister(&w, EventMaskRead)
   319  
   320  	total := n
   321  	for {
   322  		// Shorten dst to reflect bytes previously read.
   323  		dst = dst.DropFirst64(n)
   324  
   325  		// Issue the request and break out if it completes with anything
   326  		// other than "would block".
   327  		n, err = f.Readv(t, dst)
   328  		total += n
   329  		if err != syserror.ErrWouldBlock {
   330  			break
   331  		}
   332  
   333  		// Wait for a notification that we should retry.
   334  		if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
   335  			if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
   336  				err = syserror.ErrWouldBlock
   337  			}
   338  			break
   339  		}
   340  	}
   341  
   342  	f.EventUnregister(&w)
   343  
   344  	if total > 0 {
   345  		// Queue notification if we read anything.
   346  		f.Dirent.InotifyEvent(linux.IN_ACCESS, 0)
   347  	}
   348  
   349  	return total, err
   350  }
   351  
   352  func preadv(t *kernel.Task, f *fs.File, dst usermem.IOSequence, offset int64) (int64, error) {
   353  	n, err := f.Preadv(t, dst, offset)
   354  	if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
   355  		if n > 0 {
   356  			// Queue notification if we read anything.
   357  			f.Dirent.InotifyEvent(linux.IN_ACCESS, 0)
   358  		}
   359  		return n, err
   360  	}
   361  
   362  	// Register for notifications.
   363  	w, ch := waiter.NewChannelEntry(nil)
   364  	f.EventRegister(&w, EventMaskRead)
   365  
   366  	total := n
   367  	for {
   368  		// Shorten dst to reflect bytes previously read.
   369  		dst = dst.DropFirst64(n)
   370  
   371  		// Issue the request and break out if it completes with anything
   372  		// other than "would block".
   373  		n, err = f.Preadv(t, dst, offset+total)
   374  		total += n
   375  		if err != syserror.ErrWouldBlock {
   376  			break
   377  		}
   378  
   379  		// Wait for a notification that we should retry.
   380  		if err = t.Block(ch); err != nil {
   381  			break
   382  		}
   383  	}
   384  
   385  	f.EventUnregister(&w)
   386  
   387  	if total > 0 {
   388  		// Queue notification if we read anything.
   389  		f.Dirent.InotifyEvent(linux.IN_ACCESS, 0)
   390  	}
   391  
   392  	return total, err
   393  }
   394  
   395  // LINT.ThenChange(vfs2/read_write.go)