github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/splice.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fs
    16  
    17  import (
    18  	"io"
    19  	"sync/atomic"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/context"
    22  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    23  	"github.com/SagerNet/gvisor/pkg/syserror"
    24  )
    25  
    26  // Splice moves data to this file, directly from another.
    27  //
    28  // Offsets are updated only if DstOffset and SrcOffset are set.
    29  func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, error) {
    30  	// Verify basic file flag permissions.
    31  	if !dst.Flags().Write || !src.Flags().Read {
    32  		return 0, linuxerr.EBADF
    33  	}
    34  
    35  	// Check whether or not the objects being sliced are stream-oriented
    36  	// (i.e. pipes or sockets). For all stream-oriented files and files
    37  	// where a specific offiset is not request, we acquire the file mutex.
    38  	// This has two important side effects. First, it provides the standard
    39  	// protection against concurrent writes that would mutate the offset.
    40  	// Second, it prevents Splice deadlocks. Only internal anonymous files
    41  	// implement the ReadFrom and WriteTo methods directly, and since such
    42  	// anonymous files are referred to by a unique fs.File object, we know
    43  	// that the file mutex takes strict precedence over internal locks.
    44  	// Since we enforce lock ordering here, we can't deadlock by using
    45  	// using a file in two different splice operations simultaneously.
    46  	srcPipe := !IsRegular(src.Dirent.Inode.StableAttr)
    47  	dstPipe := !IsRegular(dst.Dirent.Inode.StableAttr)
    48  	dstAppend := !dstPipe && dst.Flags().Append
    49  	srcLock := srcPipe || !opts.SrcOffset
    50  	dstLock := dstPipe || !opts.DstOffset || dstAppend
    51  
    52  	switch {
    53  	case srcLock && dstLock:
    54  		switch {
    55  		case dst.UniqueID < src.UniqueID:
    56  			// Acquire dst first.
    57  			if !dst.mu.Lock(ctx) {
    58  				return 0, syserror.ErrInterrupted
    59  			}
    60  			if !src.mu.Lock(ctx) {
    61  				dst.mu.Unlock()
    62  				return 0, syserror.ErrInterrupted
    63  			}
    64  		case dst.UniqueID > src.UniqueID:
    65  			// Acquire src first.
    66  			if !src.mu.Lock(ctx) {
    67  				return 0, syserror.ErrInterrupted
    68  			}
    69  			if !dst.mu.Lock(ctx) {
    70  				src.mu.Unlock()
    71  				return 0, syserror.ErrInterrupted
    72  			}
    73  		case dst.UniqueID == src.UniqueID:
    74  			// Acquire only one lock; it's the same file. This is a
    75  			// bit of a edge case, but presumably it's possible.
    76  			if !dst.mu.Lock(ctx) {
    77  				return 0, syserror.ErrInterrupted
    78  			}
    79  			srcLock = false // Only need one unlock.
    80  		}
    81  		// Use both offsets (locked).
    82  		opts.DstStart = dst.offset
    83  		opts.SrcStart = src.offset
    84  	case dstLock:
    85  		// Acquire only dst.
    86  		if !dst.mu.Lock(ctx) {
    87  			return 0, syserror.ErrInterrupted
    88  		}
    89  		opts.DstStart = dst.offset // Safe: locked.
    90  	case srcLock:
    91  		// Acquire only src.
    92  		if !src.mu.Lock(ctx) {
    93  			return 0, syserror.ErrInterrupted
    94  		}
    95  		opts.SrcStart = src.offset // Safe: locked.
    96  	}
    97  
    98  	var err error
    99  	if dstAppend {
   100  		unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append)
   101  		defer unlock()
   102  
   103  		// Figure out the appropriate offset to use.
   104  		err = dst.offsetForAppend(ctx, &opts.DstStart)
   105  	}
   106  	if err == nil && !dstPipe {
   107  		// Enforce file limits.
   108  		limit, ok := dst.checkLimit(ctx, opts.DstStart)
   109  		switch {
   110  		case ok && limit == 0:
   111  			err = syserror.ErrExceedsFileSizeLimit
   112  		case ok && limit < opts.Length:
   113  			opts.Length = limit // Cap the write.
   114  		}
   115  	}
   116  	if err != nil {
   117  		if dstLock {
   118  			dst.mu.Unlock()
   119  		}
   120  		if srcLock {
   121  			src.mu.Unlock()
   122  		}
   123  		return 0, err
   124  	}
   125  
   126  	// Construct readers and writers for the splice. This is used to
   127  	// provide a safer locking path for the WriteTo/ReadFrom operations
   128  	// (since they will otherwise go through public interface methods which
   129  	// conflict with locking done above), and simplifies the fallback path.
   130  	w := &lockedWriter{
   131  		Ctx:    ctx,
   132  		File:   dst,
   133  		Offset: opts.DstStart,
   134  	}
   135  	r := &lockedReader{
   136  		Ctx:    ctx,
   137  		File:   src,
   138  		Offset: opts.SrcStart,
   139  	}
   140  
   141  	// Attempt to do a WriteTo; this is likely the most efficient.
   142  	n, err := src.FileOperations.WriteTo(ctx, src, w, opts.Length, opts.Dup)
   143  	if n == 0 && linuxerr.Equals(linuxerr.ENOSYS, err) && !opts.Dup {
   144  		// Attempt as a ReadFrom. If a WriteTo, a ReadFrom may also be
   145  		// more efficient than a copy if buffers are cached or readily
   146  		// available. (It's unlikely that they can actually be donated).
   147  		n, err = dst.FileOperations.ReadFrom(ctx, dst, r, opts.Length)
   148  	}
   149  
   150  	// Support one last fallback option, but only if at least one of
   151  	// the source and destination are regular files. This is because
   152  	// if we block at some point, we could lose data. If the source is
   153  	// not a pipe then reading is not destructive; if the destination
   154  	// is a regular file, then it is guaranteed not to block writing.
   155  	if n == 0 && linuxerr.Equals(linuxerr.ENOSYS, err) && !opts.Dup && (!dstPipe || !srcPipe) {
   156  		// Fallback to an in-kernel copy.
   157  		n, err = io.Copy(w, &io.LimitedReader{
   158  			R: r,
   159  			N: opts.Length,
   160  		})
   161  	}
   162  
   163  	// Update offsets, if required.
   164  	if n > 0 {
   165  		if !dstPipe && !opts.DstOffset {
   166  			atomic.StoreInt64(&dst.offset, dst.offset+n)
   167  		}
   168  		if !srcPipe && !opts.SrcOffset {
   169  			atomic.StoreInt64(&src.offset, src.offset+n)
   170  		}
   171  	}
   172  
   173  	// Drop locks.
   174  	if dstLock {
   175  		dst.mu.Unlock()
   176  	}
   177  	if srcLock {
   178  		src.mu.Unlock()
   179  	}
   180  
   181  	return n, err
   182  }