github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/syscalls/linux/sys_process_vm.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"fmt"
    19  
    20  	"github.com/metacubex/gvisor/pkg/abi/linux"
    21  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    22  	"github.com/metacubex/gvisor/pkg/hostarch"
    23  	"github.com/metacubex/gvisor/pkg/marshal"
    24  	"github.com/metacubex/gvisor/pkg/sentry/arch"
    25  	"github.com/metacubex/gvisor/pkg/sentry/kernel"
    26  	"github.com/metacubex/gvisor/pkg/usermem"
    27  )
    28  
    29  type processVMOpType int
    30  
    31  const (
    32  	processVMOpRead = iota
    33  	processVMOpWrite
    34  )
    35  
    36  // ProcessVMReadv implements process_vm_readv(2).
    37  func ProcessVMReadv(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    38  	return processVMOp(t, args, processVMOpRead)
    39  }
    40  
    41  // ProcessVMWritev implements process_vm_writev(2).
    42  func ProcessVMWritev(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    43  	return processVMOp(t, args, processVMOpWrite)
    44  }
    45  
    46  func processVMOp(t *kernel.Task, args arch.SyscallArguments, op processVMOpType) (uintptr, *kernel.SyscallControl, error) {
    47  	pid := kernel.ThreadID(args[0].Int())
    48  	lvec := hostarch.Addr(args[1].Pointer())
    49  	liovcnt := int(args[2].Int64())
    50  	rvec := hostarch.Addr(args[3].Pointer())
    51  	riovcnt := int(args[4].Int64())
    52  	flags := args[5].Int()
    53  
    54  	// Parse the flags.
    55  	switch {
    56  	case flags != 0 ||
    57  		liovcnt < 0 ||
    58  		riovcnt < 0 ||
    59  		liovcnt > linux.UIO_MAXIOV ||
    60  		riovcnt > linux.UIO_MAXIOV:
    61  		return 0, nil, linuxerr.EINVAL
    62  	case liovcnt == 0 || riovcnt == 0:
    63  		return 0, nil, nil
    64  	case lvec == 0 || rvec == 0:
    65  		return 0, nil, linuxerr.EFAULT
    66  	}
    67  
    68  	// Local process is always the current task (t). Remote process is the
    69  	// pid specified in the syscall arguments. It is allowed to be the same
    70  	// as the caller process.
    71  	remoteTask := t.PIDNamespace().TaskWithID(pid)
    72  	if remoteTask == nil {
    73  		return 0, nil, linuxerr.ESRCH
    74  	}
    75  
    76  	// man 2 process_vm_read: "Permission to read from or write to another
    77  	// process is governed by a ptrace access mode
    78  	// PTRACE_MODE_ATTACH_REALCREDS check; see ptrace(2)."
    79  	if !t.CanTrace(remoteTask, true /* attach */) {
    80  		return 0, nil, linuxerr.EPERM
    81  	}
    82  
    83  	// Figure out which processes and arguments (local or remote) are for
    84  	// writing and which are for reading, based on the operation.
    85  	var opArgs processVMOpArgs
    86  	switch op {
    87  	case processVMOpRead:
    88  		// Read from remote process and write into local.
    89  		opArgs = processVMOpArgs{
    90  			readCtx:         remoteTask.CopyContext(t, usermem.IOOpts{}),
    91  			readAddr:        rvec,
    92  			readIovecCount:  riovcnt,
    93  			writeCtx:        t.CopyContext(t, usermem.IOOpts{AddressSpaceActive: true}),
    94  			writeAddr:       lvec,
    95  			writeIovecCount: liovcnt,
    96  		}
    97  	case processVMOpWrite:
    98  		// Read from local process and write into remote.
    99  		opArgs = processVMOpArgs{
   100  			readCtx:         t.CopyContext(t, usermem.IOOpts{AddressSpaceActive: true}),
   101  			readAddr:        lvec,
   102  			readIovecCount:  liovcnt,
   103  			writeCtx:        remoteTask.CopyContext(t, usermem.IOOpts{}),
   104  			writeAddr:       rvec,
   105  			writeIovecCount: riovcnt,
   106  		}
   107  	default:
   108  		panic(fmt.Sprintf("unknown process vm op type: %v", op))
   109  	}
   110  
   111  	var (
   112  		n   int
   113  		err error
   114  	)
   115  	if t == remoteTask {
   116  		// No need to lock remote process's task mutex since it is the
   117  		// same as this process.
   118  		n, err = doProcessVMOpMaybeLocked(t, opArgs)
   119  	} else {
   120  		// Need to take remote process's task mutex to pin
   121  		// remoteTask.MemoryManager().
   122  		remoteTask.WithMuLocked(func(*kernel.Task) {
   123  			if remoteTask.MemoryManager() == nil {
   124  				err = linuxerr.ESRCH
   125  				return
   126  			}
   127  			n, err = doProcessVMOpMaybeLocked(t, opArgs)
   128  		})
   129  	}
   130  	if n == 0 && err != nil {
   131  		return 0, nil, err
   132  	}
   133  	return uintptr(n), nil, nil
   134  }
   135  
   136  type processVMOpArgs struct {
   137  	readCtx         marshal.CopyContext
   138  	readAddr        hostarch.Addr
   139  	readIovecCount  int
   140  	writeCtx        marshal.CopyContext
   141  	writeAddr       hostarch.Addr
   142  	writeIovecCount int
   143  }
   144  
   145  // maxScratchBufferSize is the maximum size of a scratch buffer. It should be
   146  // sufficiently large to minimizing the number of trips through MM.
   147  const maxScratchBufferSize = 1 << 20
   148  
   149  func doProcessVMOpMaybeLocked(t *kernel.Task, args processVMOpArgs) (int, error) {
   150  	// Copy IOVecs in to kernel.
   151  	readIovecs, err := t.CopyInIovecsAsSlice(args.readAddr, args.readIovecCount)
   152  	if err != nil {
   153  		return 0, err
   154  	}
   155  	writeIovecs, err := t.CopyInIovecsAsSlice(args.writeAddr, args.writeIovecCount)
   156  	if err != nil {
   157  		return 0, err
   158  	}
   159  
   160  	// Get scratch buffer from the calling task.
   161  	// Size should be max be size of largest read iovec.
   162  	var bufSize int
   163  	for _, readIovec := range readIovecs {
   164  		if int(readIovec.Length()) > bufSize {
   165  			bufSize = int(readIovec.Length())
   166  		}
   167  	}
   168  	if bufSize > maxScratchBufferSize {
   169  		bufSize = maxScratchBufferSize
   170  	}
   171  	buf := t.CopyScratchBuffer(bufSize)
   172  
   173  	// Number of bytes written.
   174  	var n int
   175  	for len(readIovecs) != 0 && len(writeIovecs) != 0 {
   176  		readIovec := readIovecs[0]
   177  		length := readIovec.Length()
   178  		if length == 0 {
   179  			readIovecs = readIovecs[1:]
   180  			continue
   181  		}
   182  		if length > maxScratchBufferSize {
   183  			length = maxScratchBufferSize
   184  		}
   185  		buf = buf[0:int(length)]
   186  		bytes, err := args.readCtx.CopyInBytes(readIovec.Start, buf)
   187  		if bytes == 0 {
   188  			return n, err
   189  		}
   190  		readIovecs[0].Start += hostarch.Addr(bytes)
   191  
   192  		start := 0
   193  		for bytes > start && len(writeIovecs) > 0 {
   194  			writeLength := int(writeIovecs[0].Length())
   195  			if writeLength == 0 {
   196  				writeIovecs = writeIovecs[1:]
   197  				continue
   198  			}
   199  			if writeLength > (bytes - start) {
   200  				writeLength = bytes - start
   201  			}
   202  			out, err := args.writeCtx.CopyOutBytes(writeIovecs[0].Start, buf[start:writeLength+start])
   203  			n += out
   204  			start += out
   205  			if out != writeLength {
   206  				return n, err
   207  			}
   208  			writeIovecs[0].Start += hostarch.Addr(out)
   209  		}
   210  	}
   211  	return n, nil
   212  }