github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/sys_write.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"time"
    19  
    20  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    21  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    22  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    25  	ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/socket"
    27  	"github.com/SagerNet/gvisor/pkg/syserror"
    28  	"github.com/SagerNet/gvisor/pkg/usermem"
    29  	"github.com/SagerNet/gvisor/pkg/waiter"
    30  )
    31  
    32  // LINT.IfChange
    33  
    34  const (
    35  	// EventMaskWrite contains events that can be triggered on writes.
    36  	//
    37  	// Note that EventHUp is not going to happen for pipes but may for
    38  	// implementations of poll on some sockets, see net/core/datagram.c.
    39  	EventMaskWrite = waiter.EventOut | waiter.EventHUp | waiter.EventErr
    40  )
    41  
    42  // Write implements linux syscall write(2).
    43  func Write(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    44  	fd := args[0].Int()
    45  	addr := args[1].Pointer()
    46  	size := args[2].SizeT()
    47  
    48  	file := t.GetFile(fd)
    49  	if file == nil {
    50  		return 0, nil, linuxerr.EBADF
    51  	}
    52  	defer file.DecRef(t)
    53  
    54  	// Check that the file is writable.
    55  	if !file.Flags().Write {
    56  		return 0, nil, linuxerr.EBADF
    57  	}
    58  
    59  	// Check that the size is legitimate.
    60  	si := int(size)
    61  	if si < 0 {
    62  		return 0, nil, linuxerr.EINVAL
    63  	}
    64  
    65  	// Get the source of the write.
    66  	src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
    67  		AddressSpaceActive: true,
    68  	})
    69  	if err != nil {
    70  		return 0, nil, err
    71  	}
    72  
    73  	n, err := writev(t, file, src)
    74  	t.IOUsage().AccountWriteSyscall(n)
    75  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "write", file)
    76  }
    77  
    78  // Pwrite64 implements linux syscall pwrite64(2).
    79  func Pwrite64(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    80  	fd := args[0].Int()
    81  	addr := args[1].Pointer()
    82  	size := args[2].SizeT()
    83  	offset := args[3].Int64()
    84  
    85  	file := t.GetFile(fd)
    86  	if file == nil {
    87  		return 0, nil, linuxerr.EBADF
    88  	}
    89  	defer file.DecRef(t)
    90  
    91  	// Check that the offset is legitimate and does not overflow.
    92  	if offset < 0 || offset+int64(size) < 0 {
    93  		return 0, nil, linuxerr.EINVAL
    94  	}
    95  
    96  	// Is writing at an offset supported?
    97  	if !file.Flags().Pwrite {
    98  		return 0, nil, linuxerr.ESPIPE
    99  	}
   100  
   101  	// Check that the file is writable.
   102  	if !file.Flags().Write {
   103  		return 0, nil, linuxerr.EBADF
   104  	}
   105  
   106  	// Check that the size is legitimate.
   107  	si := int(size)
   108  	if si < 0 {
   109  		return 0, nil, linuxerr.EINVAL
   110  	}
   111  
   112  	// Get the source of the write.
   113  	src, err := t.SingleIOSequence(addr, si, usermem.IOOpts{
   114  		AddressSpaceActive: true,
   115  	})
   116  	if err != nil {
   117  		return 0, nil, err
   118  	}
   119  
   120  	n, err := pwritev(t, file, src, offset)
   121  	t.IOUsage().AccountWriteSyscall(n)
   122  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwrite64", file)
   123  }
   124  
   125  // Writev implements linux syscall writev(2).
   126  func Writev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   127  	fd := args[0].Int()
   128  	addr := args[1].Pointer()
   129  	iovcnt := int(args[2].Int())
   130  
   131  	file := t.GetFile(fd)
   132  	if file == nil {
   133  		return 0, nil, linuxerr.EBADF
   134  	}
   135  	defer file.DecRef(t)
   136  
   137  	// Check that the file is writable.
   138  	if !file.Flags().Write {
   139  		return 0, nil, linuxerr.EBADF
   140  	}
   141  
   142  	// Read the iovecs that specify the source of the write.
   143  	src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
   144  		AddressSpaceActive: true,
   145  	})
   146  	if err != nil {
   147  		return 0, nil, err
   148  	}
   149  
   150  	n, err := writev(t, file, src)
   151  	t.IOUsage().AccountWriteSyscall(n)
   152  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "writev", file)
   153  }
   154  
   155  // Pwritev implements linux syscall pwritev(2).
   156  func Pwritev(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   157  	fd := args[0].Int()
   158  	addr := args[1].Pointer()
   159  	iovcnt := int(args[2].Int())
   160  	offset := args[3].Int64()
   161  
   162  	file := t.GetFile(fd)
   163  	if file == nil {
   164  		return 0, nil, linuxerr.EBADF
   165  	}
   166  	defer file.DecRef(t)
   167  
   168  	// Check that the offset is legitimate.
   169  	if offset < 0 {
   170  		return 0, nil, linuxerr.EINVAL
   171  	}
   172  
   173  	// Is writing at an offset supported?
   174  	if !file.Flags().Pwrite {
   175  		return 0, nil, linuxerr.ESPIPE
   176  	}
   177  
   178  	// Check that the file is writable.
   179  	if !file.Flags().Write {
   180  		return 0, nil, linuxerr.EBADF
   181  	}
   182  
   183  	// Read the iovecs that specify the source of the write.
   184  	src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
   185  		AddressSpaceActive: true,
   186  	})
   187  	if err != nil {
   188  		return 0, nil, err
   189  	}
   190  
   191  	n, err := pwritev(t, file, src, offset)
   192  	t.IOUsage().AccountWriteSyscall(n)
   193  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev", file)
   194  }
   195  
   196  // Pwritev2 implements linux syscall pwritev2(2).
   197  func Pwritev2(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   198  	// While the syscall is
   199  	// pwritev2(int fd, struct iovec* iov, int iov_cnt, off_t offset, int flags)
   200  	// the linux internal call
   201  	// (https://elixir.bootlin.com/linux/v4.18/source/fs/read_write.c#L1354)
   202  	// splits the offset argument into a high/low value for compatibility with
   203  	// 32-bit architectures. The flags argument is the 5th argument.
   204  
   205  	fd := args[0].Int()
   206  	addr := args[1].Pointer()
   207  	iovcnt := int(args[2].Int())
   208  	offset := args[3].Int64()
   209  	flags := int(args[5].Int())
   210  
   211  	if int(args[4].Int())&0x4 == 1 {
   212  		return 0, nil, linuxerr.EACCES
   213  	}
   214  
   215  	file := t.GetFile(fd)
   216  	if file == nil {
   217  		return 0, nil, linuxerr.EBADF
   218  	}
   219  	defer file.DecRef(t)
   220  
   221  	// Check that the offset is legitimate.
   222  	if offset < -1 {
   223  		return 0, nil, linuxerr.EINVAL
   224  	}
   225  
   226  	// Is writing at an offset supported?
   227  	if offset > -1 && !file.Flags().Pwrite {
   228  		return 0, nil, linuxerr.ESPIPE
   229  	}
   230  
   231  	// Note: gVisor does not implement the RWF_HIPRI feature, but the flag is
   232  	// accepted as a valid flag argument for pwritev2.
   233  	if flags&^linux.RWF_VALID != 0 {
   234  		return uintptr(flags), nil, syserror.EOPNOTSUPP
   235  	}
   236  
   237  	// Check that the file is writeable.
   238  	if !file.Flags().Write {
   239  		return 0, nil, linuxerr.EBADF
   240  	}
   241  
   242  	// Read the iovecs that specify the source of the write.
   243  	src, err := t.IovecsIOSequence(addr, iovcnt, usermem.IOOpts{
   244  		AddressSpaceActive: true,
   245  	})
   246  	if err != nil {
   247  		return 0, nil, err
   248  	}
   249  
   250  	// If pwritev2 is called with an offset of -1, writev is called.
   251  	if offset == -1 {
   252  		n, err := writev(t, file, src)
   253  		t.IOUsage().AccountWriteSyscall(n)
   254  		return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file)
   255  	}
   256  
   257  	n, err := pwritev(t, file, src, offset)
   258  	t.IOUsage().AccountWriteSyscall(n)
   259  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "pwritev2", file)
   260  }
   261  
   262  func writev(t *kernel.Task, f *fs.File, src usermem.IOSequence) (int64, error) {
   263  	n, err := f.Writev(t, src)
   264  	if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
   265  		if n > 0 {
   266  			// Queue notification if we wrote anything.
   267  			f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
   268  		}
   269  		return n, err
   270  	}
   271  
   272  	// Sockets support write timeouts.
   273  	var haveDeadline bool
   274  	var deadline ktime.Time
   275  	if s, ok := f.FileOperations.(socket.Socket); ok {
   276  		dl := s.SendTimeout()
   277  		if dl < 0 && err == syserror.ErrWouldBlock {
   278  			return n, err
   279  		}
   280  		if dl > 0 {
   281  			deadline = t.Kernel().MonotonicClock().Now().Add(time.Duration(dl) * time.Nanosecond)
   282  			haveDeadline = true
   283  		}
   284  	}
   285  
   286  	// Register for notifications.
   287  	w, ch := waiter.NewChannelEntry(nil)
   288  	f.EventRegister(&w, EventMaskWrite)
   289  
   290  	total := n
   291  	for {
   292  		// Shorten src to reflect bytes previously written.
   293  		src = src.DropFirst64(n)
   294  
   295  		// Issue the request and break out if it completes with
   296  		// anything other than "would block".
   297  		n, err = f.Writev(t, src)
   298  		total += n
   299  		if err != syserror.ErrWouldBlock {
   300  			break
   301  		}
   302  
   303  		// Wait for a notification that we should retry.
   304  		if err = t.BlockWithDeadline(ch, haveDeadline, deadline); err != nil {
   305  			if linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
   306  				err = syserror.ErrWouldBlock
   307  			}
   308  			break
   309  		}
   310  	}
   311  
   312  	f.EventUnregister(&w)
   313  
   314  	if total > 0 {
   315  		// Queue notification if we wrote anything.
   316  		f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
   317  	}
   318  
   319  	return total, err
   320  }
   321  
   322  func pwritev(t *kernel.Task, f *fs.File, src usermem.IOSequence, offset int64) (int64, error) {
   323  	n, err := f.Pwritev(t, src, offset)
   324  	if err != syserror.ErrWouldBlock || f.Flags().NonBlocking {
   325  		if n > 0 {
   326  			// Queue notification if we wrote anything.
   327  			f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
   328  		}
   329  		return n, err
   330  	}
   331  
   332  	// Register for notifications.
   333  	w, ch := waiter.NewChannelEntry(nil)
   334  	f.EventRegister(&w, EventMaskWrite)
   335  
   336  	total := n
   337  	for {
   338  		// Shorten src to reflect bytes previously written.
   339  		src = src.DropFirst64(n)
   340  
   341  		// Issue the request and break out if it completes with
   342  		// anything other than "would block".
   343  		n, err = f.Pwritev(t, src, offset+total)
   344  		total += n
   345  		if err != syserror.ErrWouldBlock {
   346  			break
   347  		}
   348  
   349  		// Wait for a notification that we should retry.
   350  		if err = t.Block(ch); err != nil {
   351  			break
   352  		}
   353  	}
   354  
   355  	f.EventUnregister(&w)
   356  
   357  	if total > 0 {
   358  		// Queue notification if we wrote anything.
   359  		f.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
   360  	}
   361  
   362  	return total, err
   363  }
   364  
   365  // LINT.ThenChange(vfs2/read_write.go)