github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/sys_splice.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    19  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    20  	"github.com/SagerNet/gvisor/pkg/marshal/primitive"
    21  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    22  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    24  	"github.com/SagerNet/gvisor/pkg/syserror"
    25  	"github.com/SagerNet/gvisor/pkg/waiter"
    26  )
    27  
    28  // doSplice implements a blocking splice operation.
    29  func doSplice(t *kernel.Task, outFile, inFile *fs.File, opts fs.SpliceOpts, nonBlocking bool) (int64, error) {
    30  	if opts.Length < 0 || opts.SrcStart < 0 || opts.DstStart < 0 || (opts.SrcStart+opts.Length < 0) {
    31  		return 0, linuxerr.EINVAL
    32  	}
    33  	if opts.Length == 0 {
    34  		return 0, nil
    35  	}
    36  	if opts.Length > int64(kernel.MAX_RW_COUNT) {
    37  		opts.Length = int64(kernel.MAX_RW_COUNT)
    38  	}
    39  
    40  	var (
    41  		n     int64
    42  		err   error
    43  		inCh  chan struct{}
    44  		outCh chan struct{}
    45  	)
    46  
    47  	for {
    48  		n, err = fs.Splice(t, outFile, inFile, opts)
    49  		if n != 0 || err != syserror.ErrWouldBlock {
    50  			break
    51  		} else if err == syserror.ErrWouldBlock && nonBlocking {
    52  			break
    53  		}
    54  
    55  		// Note that the blocking behavior here is a bit different than the
    56  		// normal pattern. Because we need to have both data to read and data
    57  		// to write simultaneously, we actually explicitly block on both of
    58  		// these cases in turn before returning to the splice operation.
    59  		if inFile.Readiness(EventMaskRead) == 0 {
    60  			if inCh == nil {
    61  				inCh = make(chan struct{}, 1)
    62  				inW, _ := waiter.NewChannelEntry(inCh)
    63  				inFile.EventRegister(&inW, EventMaskRead)
    64  				defer inFile.EventUnregister(&inW)
    65  				// Need to refresh readiness.
    66  				continue
    67  			}
    68  			if err = t.Block(inCh); err != nil {
    69  				break
    70  			}
    71  		}
    72  		// Don't bother checking readiness of the outFile, because it's not a
    73  		// guarantee that it won't return EWOULDBLOCK. Both pipes and eventfds
    74  		// can be "ready" but will reject writes of certain sizes with
    75  		// EWOULDBLOCK.
    76  		if outCh == nil {
    77  			outCh = make(chan struct{}, 1)
    78  			outW, _ := waiter.NewChannelEntry(outCh)
    79  			outFile.EventRegister(&outW, EventMaskWrite)
    80  			defer outFile.EventUnregister(&outW)
    81  			// We might be ready to write now. Try again before
    82  			// blocking.
    83  			continue
    84  		}
    85  		if err = t.Block(outCh); err != nil {
    86  			break
    87  		}
    88  	}
    89  
    90  	if n > 0 {
    91  		// On Linux, inotify behavior is not very consistent with splice(2). We try
    92  		// our best to emulate Linux for very basic calls to splice, where for some
    93  		// reason, events are generated for output files, but not input files.
    94  		outFile.Dirent.InotifyEvent(linux.IN_MODIFY, 0)
    95  	}
    96  	return n, err
    97  }
    98  
    99  // Sendfile implements linux system call sendfile(2).
   100  func Sendfile(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   101  	outFD := args[0].Int()
   102  	inFD := args[1].Int()
   103  	offsetAddr := args[2].Pointer()
   104  	count := int64(args[3].SizeT())
   105  
   106  	// Get files.
   107  	inFile := t.GetFile(inFD)
   108  	if inFile == nil {
   109  		return 0, nil, linuxerr.EBADF
   110  	}
   111  	defer inFile.DecRef(t)
   112  
   113  	if !inFile.Flags().Read {
   114  		return 0, nil, linuxerr.EBADF
   115  	}
   116  
   117  	outFile := t.GetFile(outFD)
   118  	if outFile == nil {
   119  		return 0, nil, linuxerr.EBADF
   120  	}
   121  	defer outFile.DecRef(t)
   122  
   123  	if !outFile.Flags().Write {
   124  		return 0, nil, linuxerr.EBADF
   125  	}
   126  
   127  	// Verify that the outfile Append flag is not set.
   128  	if outFile.Flags().Append {
   129  		return 0, nil, linuxerr.EINVAL
   130  	}
   131  
   132  	// Verify that we have a regular infile. This is a requirement; the
   133  	// same check appears in Linux (fs/splice.c:splice_direct_to_actor).
   134  	if !fs.IsRegular(inFile.Dirent.Inode.StableAttr) {
   135  		return 0, nil, linuxerr.EINVAL
   136  	}
   137  
   138  	var (
   139  		n   int64
   140  		err error
   141  	)
   142  	if offsetAddr != 0 {
   143  		// Verify that when offset address is not null, infile must be
   144  		// seekable. The fs.Splice routine itself validates basic read.
   145  		if !inFile.Flags().Pread {
   146  			return 0, nil, linuxerr.ESPIPE
   147  		}
   148  
   149  		// Copy in the offset.
   150  		var offset int64
   151  		if _, err := primitive.CopyInt64In(t, offsetAddr, &offset); err != nil {
   152  			return 0, nil, err
   153  		}
   154  
   155  		// Do the splice.
   156  		n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{
   157  			Length:    count,
   158  			SrcOffset: true,
   159  			SrcStart:  int64(offset),
   160  		}, outFile.Flags().NonBlocking)
   161  
   162  		// Copy out the new offset.
   163  		if _, err := primitive.CopyInt64Out(t, offsetAddr, offset+n); err != nil {
   164  			return 0, nil, err
   165  		}
   166  	} else {
   167  		// Send data using splice.
   168  		n, err = doSplice(t, outFile, inFile, fs.SpliceOpts{
   169  			Length: count,
   170  		}, outFile.Flags().NonBlocking)
   171  	}
   172  
   173  	// Sendfile can't lose any data because inFD is always a regual file.
   174  	if n != 0 {
   175  		err = nil
   176  	}
   177  
   178  	// We can only pass a single file to handleIOError, so pick inFile
   179  	// arbitrarily. This is used only for debugging purposes.
   180  	return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "sendfile", inFile)
   181  }
   182  
   183  // Splice implements splice(2).
   184  func Splice(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   185  	inFD := args[0].Int()
   186  	inOffset := args[1].Pointer()
   187  	outFD := args[2].Int()
   188  	outOffset := args[3].Pointer()
   189  	count := int64(args[4].SizeT())
   190  	flags := args[5].Int()
   191  
   192  	// Check for invalid flags.
   193  	if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 {
   194  		return 0, nil, linuxerr.EINVAL
   195  	}
   196  
   197  	// Get files.
   198  	outFile := t.GetFile(outFD)
   199  	if outFile == nil {
   200  		return 0, nil, linuxerr.EBADF
   201  	}
   202  	defer outFile.DecRef(t)
   203  
   204  	inFile := t.GetFile(inFD)
   205  	if inFile == nil {
   206  		return 0, nil, linuxerr.EBADF
   207  	}
   208  	defer inFile.DecRef(t)
   209  
   210  	// The operation is non-blocking if anything is non-blocking.
   211  	//
   212  	// N.B. This is a rather simplistic heuristic that avoids some
   213  	// poor edge case behavior since the exact semantics here are
   214  	// underspecified and vary between versions of Linux itself.
   215  	nonBlock := inFile.Flags().NonBlocking || outFile.Flags().NonBlocking || (flags&linux.SPLICE_F_NONBLOCK != 0)
   216  
   217  	// Construct our options.
   218  	//
   219  	// Note that exactly one of the underlying buffers must be a pipe. We
   220  	// don't actually have this constraint internally, but we enforce it
   221  	// for the semantics of the call.
   222  	opts := fs.SpliceOpts{
   223  		Length: count,
   224  	}
   225  	inFileAttr := inFile.Dirent.Inode.StableAttr
   226  	outFileAttr := outFile.Dirent.Inode.StableAttr
   227  	switch {
   228  	case fs.IsPipe(inFileAttr) && !fs.IsPipe(outFileAttr):
   229  		if inOffset != 0 {
   230  			return 0, nil, linuxerr.ESPIPE
   231  		}
   232  		if outOffset != 0 {
   233  			if !outFile.Flags().Pwrite {
   234  				return 0, nil, linuxerr.EINVAL
   235  			}
   236  
   237  			var offset int64
   238  			if _, err := primitive.CopyInt64In(t, outOffset, &offset); err != nil {
   239  				return 0, nil, err
   240  			}
   241  
   242  			// Use the destination offset.
   243  			opts.DstOffset = true
   244  			opts.DstStart = offset
   245  		}
   246  	case !fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr):
   247  		if outOffset != 0 {
   248  			return 0, nil, linuxerr.ESPIPE
   249  		}
   250  		if inOffset != 0 {
   251  			if !inFile.Flags().Pread {
   252  				return 0, nil, linuxerr.EINVAL
   253  			}
   254  
   255  			var offset int64
   256  			if _, err := primitive.CopyInt64In(t, inOffset, &offset); err != nil {
   257  				return 0, nil, err
   258  			}
   259  
   260  			// Use the source offset.
   261  			opts.SrcOffset = true
   262  			opts.SrcStart = offset
   263  		}
   264  	case fs.IsPipe(inFileAttr) && fs.IsPipe(outFileAttr):
   265  		if inOffset != 0 || outOffset != 0 {
   266  			return 0, nil, linuxerr.ESPIPE
   267  		}
   268  
   269  		// We may not refer to the same pipe; otherwise it's a continuous loop.
   270  		if inFileAttr.InodeID == outFileAttr.InodeID {
   271  			return 0, nil, linuxerr.EINVAL
   272  		}
   273  	default:
   274  		return 0, nil, linuxerr.EINVAL
   275  	}
   276  
   277  	// Splice data.
   278  	n, err := doSplice(t, outFile, inFile, opts, nonBlock)
   279  
   280  	// Special files can have additional requirements for granularity.  For
   281  	// example, read from eventfd returns EINVAL if a size is less 8 bytes.
   282  	// Inotify is another example. read will return EINVAL is a buffer is
   283  	// too small to return the next event, but a size of an event isn't
   284  	// fixed, it is sizeof(struct inotify_event) + {NAME_LEN} + 1.
   285  	if n != 0 && err != nil && (fs.IsAnonymous(inFileAttr) || fs.IsAnonymous(outFileAttr)) {
   286  		err = nil
   287  	}
   288  
   289  	// See above; inFile is chosen arbitrarily here.
   290  	return uintptr(n), nil, handleIOError(t, n != 0, err, syserror.ERESTARTSYS, "splice", inFile)
   291  }
   292  
   293  // Tee imlements tee(2).
   294  func Tee(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   295  	inFD := args[0].Int()
   296  	outFD := args[1].Int()
   297  	count := int64(args[2].SizeT())
   298  	flags := args[3].Int()
   299  
   300  	// Check for invalid flags.
   301  	if flags&^(linux.SPLICE_F_MOVE|linux.SPLICE_F_NONBLOCK|linux.SPLICE_F_MORE|linux.SPLICE_F_GIFT) != 0 {
   302  		return 0, nil, linuxerr.EINVAL
   303  	}
   304  
   305  	// Get files.
   306  	outFile := t.GetFile(outFD)
   307  	if outFile == nil {
   308  		return 0, nil, linuxerr.EBADF
   309  	}
   310  	defer outFile.DecRef(t)
   311  
   312  	inFile := t.GetFile(inFD)
   313  	if inFile == nil {
   314  		return 0, nil, linuxerr.EBADF
   315  	}
   316  	defer inFile.DecRef(t)
   317  
   318  	// All files must be pipes.
   319  	if !fs.IsPipe(inFile.Dirent.Inode.StableAttr) || !fs.IsPipe(outFile.Dirent.Inode.StableAttr) {
   320  		return 0, nil, linuxerr.EINVAL
   321  	}
   322  
   323  	// We may not refer to the same pipe; see above.
   324  	if inFile.Dirent.Inode.StableAttr.InodeID == outFile.Dirent.Inode.StableAttr.InodeID {
   325  		return 0, nil, linuxerr.EINVAL
   326  	}
   327  
   328  	// The operation is non-blocking if anything is non-blocking.
   329  	nonBlock := inFile.Flags().NonBlocking || outFile.Flags().NonBlocking || (flags&linux.SPLICE_F_NONBLOCK != 0)
   330  
   331  	// Splice data.
   332  	n, err := doSplice(t, outFile, inFile, fs.SpliceOpts{
   333  		Length: count,
   334  		Dup:    true,
   335  	}, nonBlock)
   336  
   337  	// Tee doesn't change a state of inFD, so it can't lose any data.
   338  	if n != 0 {
   339  		err = nil
   340  	}
   341  
   342  	// See above; inFile is chosen arbitrarily here.
   343  	return uintptr(n), nil, handleIOError(t, false, err, syserror.ERESTARTSYS, "tee", inFile)
   344  }