github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/kernel/task_usermem.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package kernel
    16  
    17  import (
    18  	"math"
    19  
    20  	"github.com/metacubex/gvisor/pkg/abi/linux"
    21  	"github.com/metacubex/gvisor/pkg/context"
    22  	"github.com/metacubex/gvisor/pkg/errors/linuxerr"
    23  	"github.com/metacubex/gvisor/pkg/hostarch"
    24  	"github.com/metacubex/gvisor/pkg/marshal"
    25  	"github.com/metacubex/gvisor/pkg/sentry/mm"
    26  	"github.com/metacubex/gvisor/pkg/usermem"
    27  )
    28  
    29  const iovecLength = 16
    30  
    31  // MAX_RW_COUNT is the maximum size in bytes of a single read or write.
    32  // Reads and writes that exceed this size may be silently truncated.
    33  // (Linux: include/linux/fs.h:MAX_RW_COUNT)
    34  var MAX_RW_COUNT = int(hostarch.Addr(math.MaxInt32).RoundDown())
    35  
    36  // Activate ensures that the task has an active address space.
    37  func (t *Task) Activate() {
    38  	if mm := t.MemoryManager(); mm != nil {
    39  		if err := mm.Activate(t); err != nil {
    40  			panic("unable to activate mm: " + err.Error())
    41  		}
    42  	}
    43  }
    44  
    45  // Deactivate relinquishes the task's active address space.
    46  func (t *Task) Deactivate() {
    47  	if mm := t.MemoryManager(); mm != nil {
    48  		mm.Deactivate()
    49  	}
    50  }
    51  
    52  // CopyInBytes is a fast version of CopyIn if the caller can serialize the
    53  // data without reflection and pass in a byte slice.
    54  //
    55  // This Task's AddressSpace must be active.
    56  func (t *Task) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) {
    57  	return t.MemoryManager().CopyIn(t, addr, dst, usermem.IOOpts{
    58  		AddressSpaceActive: true,
    59  	})
    60  }
    61  
    62  // CopyOutBytes is a fast version of CopyOut if the caller can serialize the
    63  // data without reflection and pass in a byte slice.
    64  //
    65  // This Task's AddressSpace must be active.
    66  func (t *Task) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) {
    67  	return t.MemoryManager().CopyOut(t, addr, src, usermem.IOOpts{
    68  		AddressSpaceActive: true,
    69  	})
    70  }
    71  
    72  // CopyInString copies a NUL-terminated string of length at most maxlen in from
    73  // the task's memory. The copy will fail with syscall.EFAULT if it traverses
    74  // user memory that is unmapped or not readable by the user.
    75  //
    76  // This Task's AddressSpace must be active.
    77  func (t *Task) CopyInString(addr hostarch.Addr, maxlen int) (string, error) {
    78  	return usermem.CopyStringIn(t, t.MemoryManager(), addr, maxlen, usermem.IOOpts{
    79  		AddressSpaceActive: true,
    80  	})
    81  }
    82  
    83  // CopyInVector copies a NULL-terminated vector of strings from the task's
    84  // memory. The copy will fail with syscall.EFAULT if it traverses
    85  // user memory that is unmapped or not readable by the user.
    86  //
    87  // maxElemSize is the maximum size of each individual element.
    88  //
    89  // maxTotalSize is the maximum total length of all elements plus the total
    90  // number of elements. For example, the following strings correspond to
    91  // the following set of sizes:
    92  //
    93  //	{ "a", "b", "c" } => 6 (3 for lengths, 3 for elements)
    94  //	{ "abc" }         => 4 (3 for length, 1 for elements)
    95  //
    96  // This Task's AddressSpace must be active.
    97  func (t *Task) CopyInVector(addr hostarch.Addr, maxElemSize, maxTotalSize int) ([]string, error) {
    98  	var v []string
    99  	for {
   100  		argAddr := t.Arch().Native(0)
   101  		if _, err := argAddr.CopyIn(t, addr); err != nil {
   102  			return v, err
   103  		}
   104  		if t.Arch().Value(argAddr) == 0 {
   105  			break
   106  		}
   107  		// Each string has a zero terminating byte counted, so copying out a string
   108  		// requires at least one byte of space. Also, see the calculation below.
   109  		if maxTotalSize <= 0 {
   110  			return nil, linuxerr.ENOMEM
   111  		}
   112  		thisMax := maxElemSize
   113  		if maxTotalSize < thisMax {
   114  			thisMax = maxTotalSize
   115  		}
   116  		arg, err := t.CopyInString(hostarch.Addr(t.Arch().Value(argAddr)), thisMax)
   117  		if err != nil {
   118  			return v, err
   119  		}
   120  		v = append(v, arg)
   121  		addr += hostarch.Addr(t.Arch().Width())
   122  		maxTotalSize -= len(arg) + 1
   123  	}
   124  	return v, nil
   125  }
   126  
   127  // CopyOutIovecs converts src to an array of struct iovecs and copies it to the
   128  // memory mapped at addr for Task.
   129  //
   130  // Preconditions: Same as usermem.IO.CopyOut, plus:
   131  //   - The caller must be running on the task goroutine.
   132  //   - t's AddressSpace must be active.
   133  func (t *Task) CopyOutIovecs(addr hostarch.Addr, src hostarch.AddrRangeSeq) error {
   134  	switch t.Arch().Width() {
   135  	case 8:
   136  		if _, ok := addr.AddLength(uint64(src.NumRanges()) * iovecLength); !ok {
   137  			return linuxerr.EFAULT
   138  		}
   139  
   140  		b := t.CopyScratchBuffer(iovecLength)
   141  		for ; !src.IsEmpty(); src = src.Tail() {
   142  			ar := src.Head()
   143  			hostarch.ByteOrder.PutUint64(b[0:8], uint64(ar.Start))
   144  			hostarch.ByteOrder.PutUint64(b[8:16], uint64(ar.Length()))
   145  			if _, err := t.CopyOutBytes(addr, b); err != nil {
   146  				return err
   147  			}
   148  			addr += iovecLength
   149  		}
   150  
   151  	default:
   152  		return linuxerr.ENOSYS
   153  	}
   154  
   155  	return nil
   156  }
   157  
   158  // CopyInIovecs copies in IoVecs for Task.
   159  //
   160  // Preconditions: Same as usermem.IO.CopyIn, plus:
   161  // * The caller must be running on the task goroutine.
   162  // * t's AddressSpace must be active.
   163  func (t *Task) CopyInIovecs(addr hostarch.Addr, numIovecs int) (hostarch.AddrRangeSeq, error) {
   164  	// Special case to avoid allocating allocating a single hostaddr.AddrRange.
   165  	if numIovecs == 1 {
   166  		return copyInIovec(t, t, addr)
   167  	}
   168  	iovecs, err := copyInIovecs(t, t, addr, numIovecs)
   169  	if err != nil {
   170  		return hostarch.AddrRangeSeq{}, err
   171  	}
   172  	return hostarch.AddrRangeSeqFromSlice(iovecs), nil
   173  }
   174  
   175  // CopyInIovecsAsSlice copies in IoVecs and returns them in a slice.
   176  //
   177  // Preconditions: Same as usermem.IO.CopyIn, plus:
   178  //   - The caller must be running on the task goroutine or hold t.mu.
   179  //   - t's AddressSpace must be active.
   180  func (t *Task) CopyInIovecsAsSlice(addr hostarch.Addr, numIovecs int) ([]hostarch.AddrRange, error) {
   181  	return copyInIovecs(t, t, addr, numIovecs)
   182  }
   183  
   184  func copyInIovec(ctx marshal.CopyContext, t *Task, addr hostarch.Addr) (hostarch.AddrRangeSeq, error) {
   185  	if err := checkArch(t); err != nil {
   186  		return hostarch.AddrRangeSeq{}, err
   187  	}
   188  	b := ctx.CopyScratchBuffer(iovecLength)
   189  	ar, err := makeIovec(ctx, t, addr, b)
   190  	if err != nil {
   191  		return hostarch.AddrRangeSeq{}, err
   192  	}
   193  	return hostarch.AddrRangeSeqOf(ar).TakeFirst(MAX_RW_COUNT), nil
   194  }
   195  
   196  // copyInIovecs copies an array of numIovecs struct iovecs from the memory
   197  // mapped at addr, converts them to hostarch.AddrRanges, and returns them as a
   198  // hostarch.AddrRangeSeq.
   199  //
   200  // copyInIovecs shares the following properties with Linux's
   201  // lib/iov_iter.c:import_iovec() => fs/read_write.c:rw_copy_check_uvector():
   202  //
   203  // - If the length of any AddrRange would exceed the range of an ssize_t,
   204  // copyInIovecs returns EINVAL.
   205  //
   206  // - If the length of any AddrRange would cause its end to overflow,
   207  // copyInIovecs returns EFAULT.
   208  //
   209  // - If any AddrRange would include addresses outside the application address
   210  // range, copyInIovecs returns EFAULT.
   211  //
   212  //   - The combined length of all AddrRanges is limited to MAX_RW_COUNT. If the
   213  //     combined length of all AddrRanges would otherwise exceed this amount, ranges
   214  //     beyond MAX_RW_COUNT are silently truncated.
   215  func copyInIovecs(ctx marshal.CopyContext, t *Task, addr hostarch.Addr, numIovecs int) ([]hostarch.AddrRange, error) {
   216  	if err := checkArch(t); err != nil {
   217  		return nil, err
   218  	}
   219  	if numIovecs == 0 {
   220  		return nil, nil
   221  	}
   222  
   223  	var dst []hostarch.AddrRange
   224  	if numIovecs > 1 {
   225  		dst = make([]hostarch.AddrRange, 0, numIovecs)
   226  	}
   227  
   228  	if _, ok := addr.AddLength(uint64(numIovecs) * iovecLength); !ok {
   229  		return nil, linuxerr.EFAULT
   230  	}
   231  
   232  	b := ctx.CopyScratchBuffer(iovecLength)
   233  	for i := 0; i < numIovecs; i++ {
   234  		ar, err := makeIovec(ctx, t, addr, b)
   235  		if err != nil {
   236  			return []hostarch.AddrRange{}, err
   237  		}
   238  		dst = append(dst, ar)
   239  
   240  		addr += iovecLength
   241  	}
   242  	// Truncate to MAX_RW_COUNT.
   243  	var total uint64
   244  	for i := range dst {
   245  		dstlen := uint64(dst[i].Length())
   246  		if rem := uint64(MAX_RW_COUNT) - total; rem < dstlen {
   247  			dst[i].End -= hostarch.Addr(dstlen - rem)
   248  			dstlen = rem
   249  		}
   250  		total += dstlen
   251  	}
   252  
   253  	return dst, nil
   254  }
   255  
   256  func checkArch(t *Task) error {
   257  	if t.Arch().Width() != 8 {
   258  		return linuxerr.ENOSYS
   259  	}
   260  	return nil
   261  }
   262  
   263  func makeIovec(ctx marshal.CopyContext, t *Task, addr hostarch.Addr, b []byte) (hostarch.AddrRange, error) {
   264  	if _, err := ctx.CopyInBytes(addr, b); err != nil {
   265  		return hostarch.AddrRange{}, err
   266  	}
   267  
   268  	base := hostarch.Addr(hostarch.ByteOrder.Uint64(b[0:8]))
   269  	length := hostarch.ByteOrder.Uint64(b[8:16])
   270  	if length > math.MaxInt64 {
   271  		return hostarch.AddrRange{}, linuxerr.EINVAL
   272  	}
   273  	ar, ok := t.MemoryManager().CheckIORange(base, int64(length))
   274  	if !ok {
   275  		return hostarch.AddrRange{}, linuxerr.EFAULT
   276  	}
   277  	return ar, nil
   278  }
   279  
   280  // SingleIOSequence returns a usermem.IOSequence representing [addr,
   281  // addr+length) in t's address space. If this contains addresses outside the
   282  // application address range, it returns EFAULT. If length exceeds
   283  // MAX_RW_COUNT, the range is silently truncated.
   284  //
   285  // SingleIOSequence is analogous to Linux's
   286  // lib/iov_iter.c:import_single_range(). (Note that the non-vectorized read and
   287  // write syscalls in Linux do not use import_single_range(). However they check
   288  // access_ok() in fs/read_write.c:vfs_read/vfs_write, and overflowing address
   289  // ranges are truncated to MAX_RW_COUNT by fs/read_write.c:rw_verify_area().)
   290  func (t *Task) SingleIOSequence(addr hostarch.Addr, length int, opts usermem.IOOpts) (usermem.IOSequence, error) {
   291  	if length > MAX_RW_COUNT {
   292  		length = MAX_RW_COUNT
   293  	}
   294  	ar, ok := t.MemoryManager().CheckIORange(addr, int64(length))
   295  	if !ok {
   296  		return usermem.IOSequence{}, linuxerr.EFAULT
   297  	}
   298  	return usermem.IOSequence{
   299  		IO:    t.MemoryManager(),
   300  		Addrs: hostarch.AddrRangeSeqOf(ar),
   301  		Opts:  opts,
   302  	}, nil
   303  }
   304  
   305  // IovecsIOSequence returns a usermem.IOSequence representing the array of
   306  // iovcnt struct iovecs at addr in t's address space. opts applies to the
   307  // returned IOSequence, not the reading of the struct iovec array.
   308  //
   309  // IovecsIOSequence is analogous to Linux's lib/iov_iter.c:import_iovec().
   310  //
   311  // Preconditions: Same as Task.CopyInIovecs.
   312  func (t *Task) IovecsIOSequence(addr hostarch.Addr, iovcnt int, opts usermem.IOOpts) (usermem.IOSequence, error) {
   313  	if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV {
   314  		return usermem.IOSequence{}, linuxerr.EINVAL
   315  	}
   316  	ars, err := t.CopyInIovecs(addr, iovcnt)
   317  	if err != nil {
   318  		return usermem.IOSequence{}, err
   319  	}
   320  	return usermem.IOSequence{
   321  		IO:    t.MemoryManager(),
   322  		Addrs: ars,
   323  		Opts:  opts,
   324  	}, nil
   325  }
   326  
   327  type taskCopyContext struct {
   328  	ctx  context.Context
   329  	t    *Task
   330  	opts usermem.IOOpts
   331  }
   332  
   333  // CopyContext returns a marshal.CopyContext that copies to/from t's address
   334  // space using opts.
   335  func (t *Task) CopyContext(ctx context.Context, opts usermem.IOOpts) *taskCopyContext {
   336  	return &taskCopyContext{
   337  		ctx:  ctx,
   338  		t:    t,
   339  		opts: opts,
   340  	}
   341  }
   342  
   343  // CopyScratchBuffer implements marshal.CopyContext.CopyScratchBuffer.
   344  func (cc *taskCopyContext) CopyScratchBuffer(size int) []byte {
   345  	if ctxTask, ok := cc.ctx.(*Task); ok {
   346  		return ctxTask.CopyScratchBuffer(size)
   347  	}
   348  	return make([]byte, size)
   349  }
   350  
   351  func (cc *taskCopyContext) getMemoryManager() (*mm.MemoryManager, error) {
   352  	tmm := cc.t.MemoryManager()
   353  	if tmm == nil {
   354  		return nil, linuxerr.ESRCH
   355  	}
   356  	if !tmm.IncUsers() {
   357  		return nil, linuxerr.EFAULT
   358  	}
   359  	return tmm, nil
   360  }
   361  
   362  // CopyInBytes implements marshal.CopyContext.CopyInBytes.
   363  //
   364  // Preconditions: Same as usermem.IO.CopyIn, plus:
   365  //   - The caller must be running on the task goroutine or hold the cc.t.mu
   366  //   - t's AddressSpace must be active.
   367  func (cc *taskCopyContext) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) {
   368  	tmm, err := cc.getMemoryManager()
   369  	if err != nil {
   370  		return 0, err
   371  	}
   372  	defer tmm.DecUsers(cc.ctx)
   373  	return tmm.CopyIn(cc.ctx, addr, dst, cc.opts)
   374  }
   375  
   376  // CopyOutBytes implements marshal.CopyContext.CopyOutBytes.
   377  //
   378  // Preconditions: Same as usermem.IO.CopyOut, plus:
   379  //   - The caller must be running on the task goroutine or hold the cc.t.mu
   380  //   - t's AddressSpace must be active.
   381  func (cc *taskCopyContext) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) {
   382  	tmm, err := cc.getMemoryManager()
   383  	if err != nil {
   384  		return 0, err
   385  	}
   386  	defer tmm.DecUsers(cc.ctx)
   387  	return tmm.CopyOut(cc.ctx, addr, src, cc.opts)
   388  }
   389  
   390  type ownTaskCopyContext struct {
   391  	t    *Task
   392  	opts usermem.IOOpts
   393  }
   394  
   395  // OwnCopyContext returns a marshal.CopyContext that copies to/from t's address
   396  // space using opts. The returned CopyContext may only be used by t's task
   397  // goroutine.
   398  //
   399  // Since t already implements marshal.CopyContext, this is only needed to
   400  // override the usermem.IOOpts used for the copy.
   401  func (t *Task) OwnCopyContext(opts usermem.IOOpts) *ownTaskCopyContext {
   402  	return &ownTaskCopyContext{
   403  		t:    t,
   404  		opts: opts,
   405  	}
   406  }
   407  
   408  // CopyScratchBuffer implements marshal.CopyContext.CopyScratchBuffer.
   409  func (cc *ownTaskCopyContext) CopyScratchBuffer(size int) []byte {
   410  	return cc.t.CopyScratchBuffer(size)
   411  }
   412  
   413  // CopyInBytes implements marshal.CopyContext.CopyInBytes.
   414  func (cc *ownTaskCopyContext) CopyInBytes(addr hostarch.Addr, dst []byte) (int, error) {
   415  	return cc.t.MemoryManager().CopyIn(cc.t, addr, dst, cc.opts)
   416  }
   417  
   418  // CopyOutBytes implements marshal.CopyContext.CopyOutBytes.
   419  func (cc *ownTaskCopyContext) CopyOutBytes(addr hostarch.Addr, src []byte) (int, error) {
   420  	return cc.t.MemoryManager().CopyOut(cc.t, addr, src, cc.opts)
   421  }