github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/syscalls/linux/sys_aio.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package linux
    16  
    17  import (
    18  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    19  	"github.com/SagerNet/gvisor/pkg/context"
    20  	"github.com/SagerNet/gvisor/pkg/errors/linuxerr"
    21  	"github.com/SagerNet/gvisor/pkg/hostarch"
    22  	"github.com/SagerNet/gvisor/pkg/marshal/primitive"
    23  	"github.com/SagerNet/gvisor/pkg/sentry/arch"
    24  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    25  	"github.com/SagerNet/gvisor/pkg/sentry/kernel"
    26  	"github.com/SagerNet/gvisor/pkg/sentry/kernel/eventfd"
    27  	ktime "github.com/SagerNet/gvisor/pkg/sentry/kernel/time"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/mm"
    29  	"github.com/SagerNet/gvisor/pkg/syserror"
    30  	"github.com/SagerNet/gvisor/pkg/usermem"
    31  )
    32  
    33  // IoSetup implements linux syscall io_setup(2).
    34  func IoSetup(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    35  	nrEvents := args[0].Int()
    36  	idAddr := args[1].Pointer()
    37  
    38  	// Linux uses the native long as the aio ID.
    39  	//
    40  	// The context pointer _must_ be zero initially.
    41  	var idIn uint64
    42  	if _, err := primitive.CopyUint64In(t, idAddr, &idIn); err != nil {
    43  		return 0, nil, err
    44  	}
    45  	if idIn != 0 {
    46  		return 0, nil, linuxerr.EINVAL
    47  	}
    48  
    49  	id, err := t.MemoryManager().NewAIOContext(t, uint32(nrEvents))
    50  	if err != nil {
    51  		return 0, nil, err
    52  	}
    53  
    54  	// Copy out the new ID.
    55  	if _, err := primitive.CopyUint64Out(t, idAddr, id); err != nil {
    56  		t.MemoryManager().DestroyAIOContext(t, id)
    57  		return 0, nil, err
    58  	}
    59  
    60  	return 0, nil, nil
    61  }
    62  
    63  // IoDestroy implements linux syscall io_destroy(2).
    64  func IoDestroy(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    65  	id := args[0].Uint64()
    66  
    67  	ctx := t.MemoryManager().DestroyAIOContext(t, id)
    68  	if ctx == nil {
    69  		// Does not exist.
    70  		return 0, nil, linuxerr.EINVAL
    71  	}
    72  
    73  	// Drain completed requests amd wait for pending requests until there are no
    74  	// more.
    75  	for {
    76  		ctx.Drain()
    77  
    78  		ch := ctx.WaitChannel()
    79  		if ch == nil {
    80  			// No more requests, we're done.
    81  			return 0, nil, nil
    82  		}
    83  		// The task cannot be interrupted during the wait. Equivalent to
    84  		// TASK_UNINTERRUPTIBLE in Linux.
    85  		t.UninterruptibleSleepStart(true /* deactivate */)
    86  		<-ch
    87  		t.UninterruptibleSleepFinish(true /* activate */)
    88  	}
    89  }
    90  
    91  // IoGetevents implements linux syscall io_getevents(2).
    92  func IoGetevents(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
    93  	id := args[0].Uint64()
    94  	minEvents := args[1].Int()
    95  	events := args[2].Int()
    96  	eventsAddr := args[3].Pointer()
    97  	timespecAddr := args[4].Pointer()
    98  
    99  	// Sanity check arguments.
   100  	if minEvents < 0 || minEvents > events {
   101  		return 0, nil, linuxerr.EINVAL
   102  	}
   103  
   104  	ctx, ok := t.MemoryManager().LookupAIOContext(t, id)
   105  	if !ok {
   106  		return 0, nil, linuxerr.EINVAL
   107  	}
   108  
   109  	// Setup the timeout.
   110  	var haveDeadline bool
   111  	var deadline ktime.Time
   112  	if timespecAddr != 0 {
   113  		d, err := copyTimespecIn(t, timespecAddr)
   114  		if err != nil {
   115  			return 0, nil, err
   116  		}
   117  		if !d.Valid() {
   118  			return 0, nil, linuxerr.EINVAL
   119  		}
   120  		deadline = t.Kernel().MonotonicClock().Now().Add(d.ToDuration())
   121  		haveDeadline = true
   122  	}
   123  
   124  	// Loop over all requests.
   125  	for count := int32(0); count < events; count++ {
   126  		// Get a request, per semantics.
   127  		var v interface{}
   128  		if count >= minEvents {
   129  			var ok bool
   130  			v, ok = ctx.PopRequest()
   131  			if !ok {
   132  				return uintptr(count), nil, nil
   133  			}
   134  		} else {
   135  			var err error
   136  			v, err = waitForRequest(ctx, t, haveDeadline, deadline)
   137  			if err != nil {
   138  				if count > 0 || linuxerr.Equals(linuxerr.ETIMEDOUT, err) {
   139  					return uintptr(count), nil, nil
   140  				}
   141  				return 0, nil, syserror.ConvertIntr(err, syserror.EINTR)
   142  			}
   143  		}
   144  
   145  		ev := v.(*linux.IOEvent)
   146  
   147  		// Copy out the result.
   148  		if _, err := ev.CopyOut(t, eventsAddr); err != nil {
   149  			if count > 0 {
   150  				return uintptr(count), nil, nil
   151  			}
   152  			// Nothing done.
   153  			return 0, nil, err
   154  		}
   155  
   156  		// Keep rolling.
   157  		eventsAddr += hostarch.Addr(linux.IOEventSize)
   158  	}
   159  
   160  	// Everything finished.
   161  	return uintptr(events), nil, nil
   162  }
   163  
   164  func waitForRequest(ctx *mm.AIOContext, t *kernel.Task, haveDeadline bool, deadline ktime.Time) (interface{}, error) {
   165  	for {
   166  		if v, ok := ctx.PopRequest(); ok {
   167  			// Request was readily available. Just return it.
   168  			return v, nil
   169  		}
   170  
   171  		// Need to wait for request completion.
   172  		done := ctx.WaitChannel()
   173  		if done == nil {
   174  			// Context has been destroyed.
   175  			return nil, linuxerr.EINVAL
   176  		}
   177  		if err := t.BlockWithDeadline(done, haveDeadline, deadline); err != nil {
   178  			return nil, err
   179  		}
   180  	}
   181  }
   182  
   183  // memoryFor returns appropriate memory for the given callback.
   184  func memoryFor(t *kernel.Task, cb *linux.IOCallback) (usermem.IOSequence, error) {
   185  	bytes := int(cb.Bytes)
   186  	if bytes < 0 {
   187  		// Linux also requires that this field fit in ssize_t.
   188  		return usermem.IOSequence{}, linuxerr.EINVAL
   189  	}
   190  
   191  	// Since this I/O will be asynchronous with respect to t's task goroutine,
   192  	// we have no guarantee that t's AddressSpace will be active during the
   193  	// I/O.
   194  	switch cb.OpCode {
   195  	case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PWRITE:
   196  		return t.SingleIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{
   197  			AddressSpaceActive: false,
   198  		})
   199  
   200  	case linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITEV:
   201  		return t.IovecsIOSequence(hostarch.Addr(cb.Buf), bytes, usermem.IOOpts{
   202  			AddressSpaceActive: false,
   203  		})
   204  
   205  	case linux.IOCB_CMD_FSYNC, linux.IOCB_CMD_FDSYNC, linux.IOCB_CMD_NOOP:
   206  		return usermem.IOSequence{}, nil
   207  
   208  	default:
   209  		// Not a supported command.
   210  		return usermem.IOSequence{}, linuxerr.EINVAL
   211  	}
   212  }
   213  
   214  // IoCancel implements linux syscall io_cancel(2).
   215  //
   216  // It is not presently supported (ENOSYS indicates no support on this
   217  // architecture).
   218  func IoCancel(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   219  	return 0, nil, syserror.ENOSYS
   220  }
   221  
   222  // LINT.IfChange
   223  
   224  func getAIOCallback(t *kernel.Task, file *fs.File, cbAddr hostarch.Addr, cb *linux.IOCallback, ioseq usermem.IOSequence, actx *mm.AIOContext, eventFile *fs.File) kernel.AIOCallback {
   225  	return func(ctx context.Context) {
   226  		if actx.Dead() {
   227  			actx.CancelPendingRequest()
   228  			return
   229  		}
   230  		ev := &linux.IOEvent{
   231  			Data: cb.Data,
   232  			Obj:  uint64(cbAddr),
   233  		}
   234  
   235  		var err error
   236  		switch cb.OpCode {
   237  		case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PREADV:
   238  			ev.Result, err = file.Preadv(ctx, ioseq, cb.Offset)
   239  		case linux.IOCB_CMD_PWRITE, linux.IOCB_CMD_PWRITEV:
   240  			ev.Result, err = file.Pwritev(ctx, ioseq, cb.Offset)
   241  		case linux.IOCB_CMD_FSYNC:
   242  			err = file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncAll)
   243  		case linux.IOCB_CMD_FDSYNC:
   244  			err = file.Fsync(ctx, 0, fs.FileMaxOffset, fs.SyncData)
   245  		}
   246  
   247  		// Update the result.
   248  		if err != nil {
   249  			err = handleIOError(t, ev.Result != 0 /* partial */, err, nil /* never interrupted */, "aio", file)
   250  			ev.Result = -int64(kernel.ExtractErrno(err, 0))
   251  		}
   252  
   253  		file.DecRef(ctx)
   254  
   255  		// Queue the result for delivery.
   256  		actx.FinishRequest(ev)
   257  
   258  		// Notify the event file if one was specified. This needs to happen
   259  		// *after* queueing the result to avoid racing with the thread we may
   260  		// wake up.
   261  		if eventFile != nil {
   262  			eventFile.FileOperations.(*eventfd.EventOperations).Signal(1)
   263  			eventFile.DecRef(ctx)
   264  		}
   265  	}
   266  }
   267  
   268  // submitCallback processes a single callback.
   269  func submitCallback(t *kernel.Task, id uint64, cb *linux.IOCallback, cbAddr hostarch.Addr) error {
   270  	file := t.GetFile(cb.FD)
   271  	if file == nil {
   272  		// File not found.
   273  		return linuxerr.EBADF
   274  	}
   275  	defer file.DecRef(t)
   276  
   277  	// Was there an eventFD? Extract it.
   278  	var eventFile *fs.File
   279  	if cb.Flags&linux.IOCB_FLAG_RESFD != 0 {
   280  		eventFile = t.GetFile(cb.ResFD)
   281  		if eventFile == nil {
   282  			// Bad FD.
   283  			return linuxerr.EBADF
   284  		}
   285  		defer eventFile.DecRef(t)
   286  
   287  		// Check that it is an eventfd.
   288  		if _, ok := eventFile.FileOperations.(*eventfd.EventOperations); !ok {
   289  			// Not an event FD.
   290  			return linuxerr.EINVAL
   291  		}
   292  	}
   293  
   294  	ioseq, err := memoryFor(t, cb)
   295  	if err != nil {
   296  		return err
   297  	}
   298  
   299  	// Check offset for reads/writes.
   300  	switch cb.OpCode {
   301  	case linux.IOCB_CMD_PREAD, linux.IOCB_CMD_PREADV, linux.IOCB_CMD_PWRITE, linux.IOCB_CMD_PWRITEV:
   302  		if cb.Offset < 0 {
   303  			return linuxerr.EINVAL
   304  		}
   305  	}
   306  
   307  	// Prepare the request.
   308  	ctx, ok := t.MemoryManager().LookupAIOContext(t, id)
   309  	if !ok {
   310  		return linuxerr.EINVAL
   311  	}
   312  	if err := ctx.Prepare(); err != nil {
   313  		return err
   314  	}
   315  
   316  	if eventFile != nil {
   317  		// The request is set. Make sure there's a ref on the file.
   318  		//
   319  		// This is necessary when the callback executes on completion,
   320  		// which is also what will release this reference.
   321  		eventFile.IncRef()
   322  	}
   323  
   324  	// Perform the request asynchronously.
   325  	file.IncRef()
   326  	t.QueueAIO(getAIOCallback(t, file, cbAddr, cb, ioseq, ctx, eventFile))
   327  
   328  	// All set.
   329  	return nil
   330  }
   331  
   332  // IoSubmit implements linux syscall io_submit(2).
   333  func IoSubmit(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
   334  	id := args[0].Uint64()
   335  	nrEvents := args[1].Int()
   336  	addr := args[2].Pointer()
   337  
   338  	if nrEvents < 0 {
   339  		return 0, nil, linuxerr.EINVAL
   340  	}
   341  
   342  	for i := int32(0); i < nrEvents; i++ {
   343  		// Copy in the callback address.
   344  		var cbAddr hostarch.Addr
   345  		switch t.Arch().Width() {
   346  		case 8:
   347  			var cbAddrP primitive.Uint64
   348  			if _, err := cbAddrP.CopyIn(t, addr); err != nil {
   349  				if i > 0 {
   350  					// Some successful.
   351  					return uintptr(i), nil, nil
   352  				}
   353  				// Nothing done.
   354  				return 0, nil, err
   355  			}
   356  			cbAddr = hostarch.Addr(cbAddrP)
   357  		default:
   358  			return 0, nil, syserror.ENOSYS
   359  		}
   360  
   361  		// Copy in this callback.
   362  		var cb linux.IOCallback
   363  		if _, err := cb.CopyIn(t, cbAddr); err != nil {
   364  
   365  			if i > 0 {
   366  				// Some have been successful.
   367  				return uintptr(i), nil, nil
   368  			}
   369  			// Nothing done.
   370  			return 0, nil, err
   371  		}
   372  
   373  		// Process this callback.
   374  		if err := submitCallback(t, id, &cb, cbAddr); err != nil {
   375  			if i > 0 {
   376  				// Partial success.
   377  				return uintptr(i), nil, nil
   378  			}
   379  			// Nothing done.
   380  			return 0, nil, err
   381  		}
   382  
   383  		// Advance to the next one.
   384  		addr += hostarch.Addr(t.Arch().Width())
   385  	}
   386  
   387  	return uintptr(nrEvents), nil, nil
   388  }
   389  
   390  // LINT.ThenChange(vfs2/aio.go)