github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/syscalls/linux/sys_process_vm.go (about) 1 // Copyright 2020 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package linux 16 17 import ( 18 "fmt" 19 20 "github.com/metacubex/gvisor/pkg/abi/linux" 21 "github.com/metacubex/gvisor/pkg/errors/linuxerr" 22 "github.com/metacubex/gvisor/pkg/hostarch" 23 "github.com/metacubex/gvisor/pkg/marshal" 24 "github.com/metacubex/gvisor/pkg/sentry/arch" 25 "github.com/metacubex/gvisor/pkg/sentry/kernel" 26 "github.com/metacubex/gvisor/pkg/usermem" 27 ) 28 29 type processVMOpType int 30 31 const ( 32 processVMOpRead = iota 33 processVMOpWrite 34 ) 35 36 // ProcessVMReadv implements process_vm_readv(2). 37 func ProcessVMReadv(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 38 return processVMOp(t, args, processVMOpRead) 39 } 40 41 // ProcessVMWritev implements process_vm_writev(2). 42 func ProcessVMWritev(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { 43 return processVMOp(t, args, processVMOpWrite) 44 } 45 46 func processVMOp(t *kernel.Task, args arch.SyscallArguments, op processVMOpType) (uintptr, *kernel.SyscallControl, error) { 47 pid := kernel.ThreadID(args[0].Int()) 48 lvec := hostarch.Addr(args[1].Pointer()) 49 liovcnt := int(args[2].Int64()) 50 rvec := hostarch.Addr(args[3].Pointer()) 51 riovcnt := int(args[4].Int64()) 52 flags := args[5].Int() 53 54 // Parse the flags. 55 switch { 56 case flags != 0 || 57 liovcnt < 0 || 58 riovcnt < 0 || 59 liovcnt > linux.UIO_MAXIOV || 60 riovcnt > linux.UIO_MAXIOV: 61 return 0, nil, linuxerr.EINVAL 62 case liovcnt == 0 || riovcnt == 0: 63 return 0, nil, nil 64 case lvec == 0 || rvec == 0: 65 return 0, nil, linuxerr.EFAULT 66 } 67 68 // Local process is always the current task (t). Remote process is the 69 // pid specified in the syscall arguments. It is allowed to be the same 70 // as the caller process. 71 remoteTask := t.PIDNamespace().TaskWithID(pid) 72 if remoteTask == nil { 73 return 0, nil, linuxerr.ESRCH 74 } 75 76 // man 2 process_vm_read: "Permission to read from or write to another 77 // process is governed by a ptrace access mode 78 // PTRACE_MODE_ATTACH_REALCREDS check; see ptrace(2)." 79 if !t.CanTrace(remoteTask, true /* attach */) { 80 return 0, nil, linuxerr.EPERM 81 } 82 83 // Figure out which processes and arguments (local or remote) are for 84 // writing and which are for reading, based on the operation. 85 var opArgs processVMOpArgs 86 switch op { 87 case processVMOpRead: 88 // Read from remote process and write into local. 89 opArgs = processVMOpArgs{ 90 readCtx: remoteTask.CopyContext(t, usermem.IOOpts{}), 91 readAddr: rvec, 92 readIovecCount: riovcnt, 93 writeCtx: t.CopyContext(t, usermem.IOOpts{AddressSpaceActive: true}), 94 writeAddr: lvec, 95 writeIovecCount: liovcnt, 96 } 97 case processVMOpWrite: 98 // Read from local process and write into remote. 99 opArgs = processVMOpArgs{ 100 readCtx: t.CopyContext(t, usermem.IOOpts{AddressSpaceActive: true}), 101 readAddr: lvec, 102 readIovecCount: liovcnt, 103 writeCtx: remoteTask.CopyContext(t, usermem.IOOpts{}), 104 writeAddr: rvec, 105 writeIovecCount: riovcnt, 106 } 107 default: 108 panic(fmt.Sprintf("unknown process vm op type: %v", op)) 109 } 110 111 var ( 112 n int 113 err error 114 ) 115 if t == remoteTask { 116 // No need to lock remote process's task mutex since it is the 117 // same as this process. 118 n, err = doProcessVMOpMaybeLocked(t, opArgs) 119 } else { 120 // Need to take remote process's task mutex to pin 121 // remoteTask.MemoryManager(). 122 remoteTask.WithMuLocked(func(*kernel.Task) { 123 if remoteTask.MemoryManager() == nil { 124 err = linuxerr.ESRCH 125 return 126 } 127 n, err = doProcessVMOpMaybeLocked(t, opArgs) 128 }) 129 } 130 if n == 0 && err != nil { 131 return 0, nil, err 132 } 133 return uintptr(n), nil, nil 134 } 135 136 type processVMOpArgs struct { 137 readCtx marshal.CopyContext 138 readAddr hostarch.Addr 139 readIovecCount int 140 writeCtx marshal.CopyContext 141 writeAddr hostarch.Addr 142 writeIovecCount int 143 } 144 145 // maxScratchBufferSize is the maximum size of a scratch buffer. It should be 146 // sufficiently large to minimizing the number of trips through MM. 147 const maxScratchBufferSize = 1 << 20 148 149 func doProcessVMOpMaybeLocked(t *kernel.Task, args processVMOpArgs) (int, error) { 150 // Copy IOVecs in to kernel. 151 readIovecs, err := t.CopyInIovecsAsSlice(args.readAddr, args.readIovecCount) 152 if err != nil { 153 return 0, err 154 } 155 writeIovecs, err := t.CopyInIovecsAsSlice(args.writeAddr, args.writeIovecCount) 156 if err != nil { 157 return 0, err 158 } 159 160 // Get scratch buffer from the calling task. 161 // Size should be max be size of largest read iovec. 162 var bufSize int 163 for _, readIovec := range readIovecs { 164 if int(readIovec.Length()) > bufSize { 165 bufSize = int(readIovec.Length()) 166 } 167 } 168 if bufSize > maxScratchBufferSize { 169 bufSize = maxScratchBufferSize 170 } 171 buf := t.CopyScratchBuffer(bufSize) 172 173 // Number of bytes written. 174 var n int 175 for len(readIovecs) != 0 && len(writeIovecs) != 0 { 176 readIovec := readIovecs[0] 177 length := readIovec.Length() 178 if length == 0 { 179 readIovecs = readIovecs[1:] 180 continue 181 } 182 if length > maxScratchBufferSize { 183 length = maxScratchBufferSize 184 } 185 buf = buf[0:int(length)] 186 bytes, err := args.readCtx.CopyInBytes(readIovec.Start, buf) 187 if bytes == 0 { 188 return n, err 189 } 190 readIovecs[0].Start += hostarch.Addr(bytes) 191 192 start := 0 193 for bytes > start && len(writeIovecs) > 0 { 194 writeLength := int(writeIovecs[0].Length()) 195 if writeLength == 0 { 196 writeIovecs = writeIovecs[1:] 197 continue 198 } 199 if writeLength > (bytes - start) { 200 writeLength = bytes - start 201 } 202 out, err := args.writeCtx.CopyOutBytes(writeIovecs[0].Start, buf[start:writeLength+start]) 203 n += out 204 start += out 205 if out != writeLength { 206 return n, err 207 } 208 writeIovecs[0].Start += hostarch.Addr(out) 209 } 210 } 211 return n, nil 212 }