github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/fs/splice.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fs 16 17 import ( 18 "io" 19 "sync/atomic" 20 21 "github.com/SagerNet/gvisor/pkg/context" 22 "github.com/SagerNet/gvisor/pkg/errors/linuxerr" 23 "github.com/SagerNet/gvisor/pkg/syserror" 24 ) 25 26 // Splice moves data to this file, directly from another. 27 // 28 // Offsets are updated only if DstOffset and SrcOffset are set. 29 func Splice(ctx context.Context, dst *File, src *File, opts SpliceOpts) (int64, error) { 30 // Verify basic file flag permissions. 31 if !dst.Flags().Write || !src.Flags().Read { 32 return 0, linuxerr.EBADF 33 } 34 35 // Check whether or not the objects being sliced are stream-oriented 36 // (i.e. pipes or sockets). For all stream-oriented files and files 37 // where a specific offiset is not request, we acquire the file mutex. 38 // This has two important side effects. First, it provides the standard 39 // protection against concurrent writes that would mutate the offset. 40 // Second, it prevents Splice deadlocks. Only internal anonymous files 41 // implement the ReadFrom and WriteTo methods directly, and since such 42 // anonymous files are referred to by a unique fs.File object, we know 43 // that the file mutex takes strict precedence over internal locks. 44 // Since we enforce lock ordering here, we can't deadlock by using 45 // using a file in two different splice operations simultaneously. 46 srcPipe := !IsRegular(src.Dirent.Inode.StableAttr) 47 dstPipe := !IsRegular(dst.Dirent.Inode.StableAttr) 48 dstAppend := !dstPipe && dst.Flags().Append 49 srcLock := srcPipe || !opts.SrcOffset 50 dstLock := dstPipe || !opts.DstOffset || dstAppend 51 52 switch { 53 case srcLock && dstLock: 54 switch { 55 case dst.UniqueID < src.UniqueID: 56 // Acquire dst first. 57 if !dst.mu.Lock(ctx) { 58 return 0, syserror.ErrInterrupted 59 } 60 if !src.mu.Lock(ctx) { 61 dst.mu.Unlock() 62 return 0, syserror.ErrInterrupted 63 } 64 case dst.UniqueID > src.UniqueID: 65 // Acquire src first. 66 if !src.mu.Lock(ctx) { 67 return 0, syserror.ErrInterrupted 68 } 69 if !dst.mu.Lock(ctx) { 70 src.mu.Unlock() 71 return 0, syserror.ErrInterrupted 72 } 73 case dst.UniqueID == src.UniqueID: 74 // Acquire only one lock; it's the same file. This is a 75 // bit of a edge case, but presumably it's possible. 76 if !dst.mu.Lock(ctx) { 77 return 0, syserror.ErrInterrupted 78 } 79 srcLock = false // Only need one unlock. 80 } 81 // Use both offsets (locked). 82 opts.DstStart = dst.offset 83 opts.SrcStart = src.offset 84 case dstLock: 85 // Acquire only dst. 86 if !dst.mu.Lock(ctx) { 87 return 0, syserror.ErrInterrupted 88 } 89 opts.DstStart = dst.offset // Safe: locked. 90 case srcLock: 91 // Acquire only src. 92 if !src.mu.Lock(ctx) { 93 return 0, syserror.ErrInterrupted 94 } 95 opts.SrcStart = src.offset // Safe: locked. 96 } 97 98 var err error 99 if dstAppend { 100 unlock := dst.Dirent.Inode.lockAppendMu(dst.Flags().Append) 101 defer unlock() 102 103 // Figure out the appropriate offset to use. 104 err = dst.offsetForAppend(ctx, &opts.DstStart) 105 } 106 if err == nil && !dstPipe { 107 // Enforce file limits. 108 limit, ok := dst.checkLimit(ctx, opts.DstStart) 109 switch { 110 case ok && limit == 0: 111 err = syserror.ErrExceedsFileSizeLimit 112 case ok && limit < opts.Length: 113 opts.Length = limit // Cap the write. 114 } 115 } 116 if err != nil { 117 if dstLock { 118 dst.mu.Unlock() 119 } 120 if srcLock { 121 src.mu.Unlock() 122 } 123 return 0, err 124 } 125 126 // Construct readers and writers for the splice. This is used to 127 // provide a safer locking path for the WriteTo/ReadFrom operations 128 // (since they will otherwise go through public interface methods which 129 // conflict with locking done above), and simplifies the fallback path. 130 w := &lockedWriter{ 131 Ctx: ctx, 132 File: dst, 133 Offset: opts.DstStart, 134 } 135 r := &lockedReader{ 136 Ctx: ctx, 137 File: src, 138 Offset: opts.SrcStart, 139 } 140 141 // Attempt to do a WriteTo; this is likely the most efficient. 142 n, err := src.FileOperations.WriteTo(ctx, src, w, opts.Length, opts.Dup) 143 if n == 0 && linuxerr.Equals(linuxerr.ENOSYS, err) && !opts.Dup { 144 // Attempt as a ReadFrom. If a WriteTo, a ReadFrom may also be 145 // more efficient than a copy if buffers are cached or readily 146 // available. (It's unlikely that they can actually be donated). 147 n, err = dst.FileOperations.ReadFrom(ctx, dst, r, opts.Length) 148 } 149 150 // Support one last fallback option, but only if at least one of 151 // the source and destination are regular files. This is because 152 // if we block at some point, we could lose data. If the source is 153 // not a pipe then reading is not destructive; if the destination 154 // is a regular file, then it is guaranteed not to block writing. 155 if n == 0 && linuxerr.Equals(linuxerr.ENOSYS, err) && !opts.Dup && (!dstPipe || !srcPipe) { 156 // Fallback to an in-kernel copy. 157 n, err = io.Copy(w, &io.LimitedReader{ 158 R: r, 159 N: opts.Length, 160 }) 161 } 162 163 // Update offsets, if required. 164 if n > 0 { 165 if !dstPipe && !opts.DstOffset { 166 atomic.StoreInt64(&dst.offset, dst.offset+n) 167 } 168 if !srcPipe && !opts.SrcOffset { 169 atomic.StoreInt64(&src.offset, src.offset+n) 170 } 171 } 172 173 // Drop locks. 174 if dstLock { 175 dst.mu.Unlock() 176 } 177 if srcLock { 178 src.mu.Unlock() 179 } 180 181 return n, err 182 }