github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/sentry/fsimpl/iouringfs/iouringfs.go (about)

     1  // Copyright 2022 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package iouringfs provides a filesystem implementation for IO_URING basing
    16  // it on anonfs. Currently, we don't support neither IOPOLL nor SQPOLL modes.
    17  // Thus, user needs to set up IO_URING first with io_uring_setup(2) syscall and
    18  // then issue submission request using io_uring_enter(2).
    19  //
    20  // Another important note, as of now, we don't support deferred CQE. In other
    21  // words, the size of the backlogged set of CQE is zero. Whenever, completion
    22  // queue ring buffer is full, we drop the subsequent completion queue entries.
    23  package iouringfs
    24  
    25  import (
    26  	"fmt"
    27  	"io"
    28  
    29  	"github.com/ttpreport/gvisor-ligolo/pkg/abi/linux"
    30  	"github.com/ttpreport/gvisor-ligolo/pkg/atomicbitops"
    31  	"github.com/ttpreport/gvisor-ligolo/pkg/context"
    32  	"github.com/ttpreport/gvisor-ligolo/pkg/errors/linuxerr"
    33  	"github.com/ttpreport/gvisor-ligolo/pkg/hostarch"
    34  	"github.com/ttpreport/gvisor-ligolo/pkg/safemem"
    35  	"github.com/ttpreport/gvisor-ligolo/pkg/sentry/kernel"
    36  	"github.com/ttpreport/gvisor-ligolo/pkg/sentry/memmap"
    37  	"github.com/ttpreport/gvisor-ligolo/pkg/sentry/pgalloc"
    38  	"github.com/ttpreport/gvisor-ligolo/pkg/sentry/usage"
    39  	"github.com/ttpreport/gvisor-ligolo/pkg/sentry/vfs"
    40  	"github.com/ttpreport/gvisor-ligolo/pkg/usermem"
    41  )
    42  
    43  // FileDescription implements vfs.FileDescriptionImpl for file-based IO_URING.
    44  // It is based on io_rings struct. See io_uring/io_uring.c.
    45  //
    46  // +stateify savable
    47  type FileDescription struct {
    48  	vfsfd vfs.FileDescription
    49  	vfs.FileDescriptionDefaultImpl
    50  	vfs.DentryMetadataFileDescriptionImpl
    51  	vfs.NoLockFD
    52  
    53  	mfp pgalloc.MemoryFileProvider
    54  
    55  	rbmf  ringsBufferFile
    56  	sqemf sqEntriesFile
    57  
    58  	// running indicates whether the submission queue is currently being
    59  	// processed. This is either 0 for not running, or 1 for running.
    60  	running atomicbitops.Uint32
    61  	// runC is used to wake up serialized task goroutines waiting for any
    62  	// concurrent processors of the submisison queue.
    63  	runC chan struct{} `state:"nosave"`
    64  
    65  	ioRings linux.IORings
    66  
    67  	ioRingsBuf sharedBuffer `state:"nosave"`
    68  	sqesBuf    sharedBuffer `state:"nosave"`
    69  	cqesBuf    sharedBuffer `state:"nosave"`
    70  
    71  	// remap indicates whether the shared buffers need to be remapped
    72  	// due to a S/R. Protected by ProcessSubmissions critical section.
    73  	remap bool
    74  }
    75  
    76  var _ vfs.FileDescriptionImpl = (*FileDescription)(nil)
    77  
    78  func roundUpPowerOfTwo(n uint32) (uint32, bool) {
    79  	if n > (1 << 31) {
    80  		return 0, false
    81  	}
    82  	result := uint32(1)
    83  	for result < n {
    84  		result = result << 1
    85  	}
    86  	return result, true
    87  }
    88  
    89  // New creates a new iouring fd.
    90  func New(ctx context.Context, vfsObj *vfs.VirtualFilesystem, entries uint32, params *linux.IOUringParams) (*vfs.FileDescription, error) {
    91  	if entries > linux.IORING_MAX_ENTRIES {
    92  		return nil, linuxerr.EINVAL
    93  	}
    94  
    95  	vd := vfsObj.NewAnonVirtualDentry("[io_uring]")
    96  	defer vd.DecRef(ctx)
    97  
    98  	mfp := pgalloc.MemoryFileProviderFromContext(ctx)
    99  	if mfp == nil {
   100  		panic(fmt.Sprintf("context.Context %T lacks non-nil value for key %T", ctx, pgalloc.CtxMemoryFileProvider))
   101  	}
   102  
   103  	numSqEntries, ok := roundUpPowerOfTwo(entries)
   104  	if !ok {
   105  		return nil, linuxerr.EOVERFLOW
   106  	}
   107  	var numCqEntries uint32
   108  	if params.Flags&linux.IORING_SETUP_CQSIZE != 0 {
   109  		var ok bool
   110  		numCqEntries, ok = roundUpPowerOfTwo(params.CqEntries)
   111  		if !ok || numCqEntries < numSqEntries || numCqEntries > linux.IORING_MAX_CQ_ENTRIES {
   112  			return nil, linuxerr.EINVAL
   113  		}
   114  	} else {
   115  		numCqEntries = 2 * numSqEntries
   116  	}
   117  
   118  	// Allocate enough space to store the `struct io_rings` plus a given number of indexes
   119  	// corresponding to the number of SQEs.
   120  	ioRingsWithCqesSize := uint32((*linux.IORings)(nil).SizeBytes()) +
   121  		numCqEntries*uint32((*linux.IOUringCqe)(nil).SizeBytes())
   122  	ringsBufferSize := uint64(ioRingsWithCqesSize +
   123  		numSqEntries*uint32((*linux.IORingIndex)(nil).SizeBytes()))
   124  	ringsBufferSize = uint64(hostarch.Addr(ringsBufferSize).MustRoundUp())
   125  
   126  	mf := mfp.MemoryFile()
   127  	memCgID := pgalloc.MemoryCgroupIDFromContext(ctx)
   128  	rbfr, err := mf.Allocate(ringsBufferSize, pgalloc.AllocOpts{Kind: usage.Anonymous, MemCgID: memCgID})
   129  	if err != nil {
   130  		return nil, linuxerr.ENOMEM
   131  	}
   132  
   133  	// Allocate enough space to store the given number of submission queue entries.
   134  	sqEntriesSize := uint64(numSqEntries * uint32((*linux.IOUringSqe)(nil).SizeBytes()))
   135  	sqEntriesSize = uint64(hostarch.Addr(sqEntriesSize).MustRoundUp())
   136  	sqefr, err := mf.Allocate(sqEntriesSize, pgalloc.AllocOpts{Kind: usage.Anonymous, MemCgID: memCgID})
   137  	if err != nil {
   138  		return nil, linuxerr.ENOMEM
   139  	}
   140  
   141  	iouringfd := &FileDescription{
   142  		mfp: mfp,
   143  		rbmf: ringsBufferFile{
   144  			fr: rbfr,
   145  		},
   146  		sqemf: sqEntriesFile{
   147  			fr: sqefr,
   148  		},
   149  		// See ProcessSubmissions for why the capacity is 1.
   150  		runC: make(chan struct{}, 1),
   151  	}
   152  
   153  	// iouringfd is always set up with read/write mode.
   154  	// See io_uring/io_uring.c:io_uring_install_fd().
   155  	if err := iouringfd.vfsfd.Init(iouringfd, uint32(linux.O_RDWR), vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{
   156  		UseDentryMetadata: true,
   157  		DenyPRead:         true,
   158  		DenyPWrite:        true,
   159  		DenySpliceIn:      true,
   160  	}); err != nil {
   161  		return nil, err
   162  	}
   163  
   164  	params.SqEntries = numSqEntries
   165  	params.CqEntries = numCqEntries
   166  
   167  	arrayOffset := uint64(hostarch.Addr(ioRingsWithCqesSize))
   168  	arrayOffset, ok = hostarch.CacheLineRoundUp(arrayOffset)
   169  	if !ok {
   170  		return nil, linuxerr.EOVERFLOW
   171  	}
   172  
   173  	params.SqOff = linux.PreComputedIOSqRingOffsets()
   174  	params.SqOff.Array = uint32(arrayOffset)
   175  
   176  	cqesOffset := uint64(hostarch.Addr((*linux.IORings)(nil).SizeBytes()))
   177  	cqesOffset, ok = hostarch.CacheLineRoundUp(cqesOffset)
   178  	if !ok {
   179  		return nil, linuxerr.EOVERFLOW
   180  	}
   181  
   182  	params.CqOff = linux.PreComputedIOCqRingOffsets()
   183  	params.CqOff.Cqes = uint32(cqesOffset)
   184  
   185  	// Set features supported by the current IO_URING implementation.
   186  	params.Features = linux.IORING_FEAT_SINGLE_MMAP
   187  
   188  	// Map all shared buffers.
   189  	if err := iouringfd.mapSharedBuffers(); err != nil {
   190  		return nil, err
   191  	}
   192  
   193  	// Initialize IORings struct from params.
   194  	iouringfd.ioRings.SqRingMask = params.SqEntries - 1
   195  	iouringfd.ioRings.CqRingMask = params.CqEntries - 1
   196  	iouringfd.ioRings.SqRingEntries = params.SqEntries
   197  	iouringfd.ioRings.CqRingEntries = params.CqEntries
   198  
   199  	// Write IORings out to shared buffer.
   200  	view, err := iouringfd.ioRingsBuf.view(iouringfd.ioRings.SizeBytes())
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	iouringfd.ioRings.MarshalUnsafe(view)
   205  
   206  	buf := make([]byte, iouringfd.ioRings.SizeBytes())
   207  	iouringfd.ioRings.MarshalUnsafe(buf)
   208  
   209  	if _, err := iouringfd.ioRingsBuf.writeback(iouringfd.ioRings.SizeBytes()); err != nil {
   210  		return nil, err
   211  	}
   212  
   213  	return &iouringfd.vfsfd, nil
   214  }
   215  
   216  // Release implements vfs.FileDescriptionImpl.Release.
   217  func (fd *FileDescription) Release(ctx context.Context) {
   218  	mf := pgalloc.MemoryFileProviderFromContext(ctx).MemoryFile()
   219  	mf.DecRef(fd.rbmf.fr)
   220  	mf.DecRef(fd.sqemf.fr)
   221  }
   222  
   223  // mapSharedBuffers caches internal mappings for the ring's shared memory
   224  // regions.
   225  func (fd *FileDescription) mapSharedBuffers() error {
   226  	mf := fd.mfp.MemoryFile()
   227  
   228  	// Mapping for the IORings header struct.
   229  	rb, err := mf.MapInternal(fd.rbmf.fr, hostarch.ReadWrite)
   230  	if err != nil {
   231  		return err
   232  	}
   233  	fd.ioRingsBuf.init(rb)
   234  
   235  	// Mapping for the CQEs array. This is contiguous to the header struct.
   236  	cqesOffset := uint64(fd.ioRings.SizeBytes())
   237  	cqesOffset, ok := hostarch.CacheLineRoundUp(cqesOffset)
   238  	if !ok {
   239  		return linuxerr.EOVERFLOW
   240  	}
   241  	cqes := rb.DropFirst(int(cqesOffset))
   242  	fd.cqesBuf.init(cqes)
   243  
   244  	// Mapping for the SQEs array.
   245  	sqes, err := mf.MapInternal(fd.sqemf.fr, hostarch.ReadWrite)
   246  	if err != nil {
   247  		return err
   248  	}
   249  	fd.sqesBuf.init(sqes)
   250  
   251  	return nil
   252  
   253  }
   254  
   255  // ConfigureMMap implements vfs.FileDescriptionImpl.ConfigureMMap.
   256  func (fd *FileDescription) ConfigureMMap(ctx context.Context, opts *memmap.MMapOpts) error {
   257  	var mf memmap.Mappable
   258  	switch opts.Offset {
   259  	case linux.IORING_OFF_SQ_RING, linux.IORING_OFF_CQ_RING:
   260  		mf = &fd.rbmf
   261  	case linux.IORING_OFF_SQES:
   262  		mf = &fd.sqemf
   263  	default:
   264  		return linuxerr.EINVAL
   265  	}
   266  
   267  	opts.Offset = 0
   268  
   269  	return vfs.GenericConfigureMMap(&fd.vfsfd, mf, opts)
   270  }
   271  
   272  // ProcessSubmissions processes the submission queue. Concurrent calls to
   273  // ProcessSubmissions serialize, yielding task goroutines with Task.Block since
   274  // processing can take a long time.
   275  func (fd *FileDescription) ProcessSubmissions(t *kernel.Task, toSubmit uint32, minComplete uint32, flags uint32) (int, error) {
   276  	// We use a combination of fd.running and fd.runC to serialize concurrent
   277  	// callers to ProcessSubmissions. runC has a capacity of 1. The protocol
   278  	// works as follows:
   279  	//
   280  	// * Becoming the active task
   281  	//
   282  	// On entry to ProcessSubmissions, we try to transition running from 0 to
   283  	// 1. If there is already an active task, this will fail and we'll go to
   284  	// sleep with Task.Block(). If we succeed, we're the active task.
   285  	//
   286  	// * Sleep, Wakeup
   287  	//
   288  	// If we had to sleep, on wakeup we try to transition running to 1 again as
   289  	// we could still be racing with other tasks. Note that if multiple tasks
   290  	// are sleeping, only one will wake up since only one will successfully
   291  	// receive from runC. However we could still race with a new caller of
   292  	// ProcessSubmissions that hasn't gone to sleep yet. Only one waiting task
   293  	// will succeed and become the active task, the rest will go to sleep.
   294  	//
   295  	// runC needs to be buffered to avoid a race between checking running and
   296  	// going back to sleep. With an unbuffered channel, we could miss a wakeup
   297  	// like this:
   298  	//
   299  	// Task B (entering, sleeping)                        | Task A (active, releasing)
   300  	// ---------------------------------------------------+-------------------------
   301  	//                                                    | fd.running.Store(0)
   302  	// for !fd.running.CompareAndSwap(0, 1) { // Succeess |
   303  	//                                                    | nonblockingSend(runC) // Missed!
   304  	//     t.Block(fd.runC) // Will block forever         |
   305  	// }
   306  	//
   307  	// Task A's send would have to be non-blocking, as there may not be a
   308  	// concurrent Task B.
   309  	//
   310  	// A side-effect of using a buffered channel is the first task that needs to
   311  	// sleep may wake up once immediately due to a previously queued
   312  	// wakeup. This isn't a problem, as it'll immediately try to transition
   313  	// running to 1, likely fail again and go back to sleep. Task.Block has a
   314  	// fast path if runC already has a queued message so this won't result in a
   315  	// task state change.
   316  	//
   317  	// * Release
   318  	//
   319  	// When the active task is done, it releases the critical section by setting
   320  	// running = 0, then doing a non-blocking send on runC. The send needs to be
   321  	// non-blocking, as there may not be a concurrent sleeper.
   322  	for !fd.running.CompareAndSwap(0, 1) {
   323  		t.Block(fd.runC)
   324  	}
   325  	// We successfully set fd.running, so we're the active task now.
   326  	defer func() {
   327  		// Unblock any potentially waiting tasks.
   328  		if !fd.running.CompareAndSwap(1, 0) {
   329  			panic(fmt.Sprintf("iouringfs.FileDescription.ProcessSubmissions: active task encountered invalid fd.running state %v", fd.running.Load()))
   330  		}
   331  		select {
   332  		case fd.runC <- struct{}{}:
   333  		default:
   334  		}
   335  	}()
   336  
   337  	// The rest of this function is a critical section with respect to
   338  	// concurrent callers.
   339  
   340  	if fd.remap {
   341  		fd.mapSharedBuffers()
   342  		fd.remap = false
   343  	}
   344  
   345  	var err error
   346  	var sqe linux.IOUringSqe
   347  
   348  	sqOff := linux.PreComputedIOSqRingOffsets()
   349  	cqOff := linux.PreComputedIOCqRingOffsets()
   350  	sqArraySize := sqe.SizeBytes() * int(fd.ioRings.SqRingEntries)
   351  	cqArraySize := (*linux.IOUringCqe)(nil).SizeBytes() * int(fd.ioRings.CqRingEntries)
   352  
   353  	// Fetch all buffers initially.
   354  	fetchRB := true
   355  	fetchSQA := true
   356  	fetchCQA := true
   357  
   358  	var view, sqaView, cqaView []byte
   359  	submitted := uint32(0)
   360  
   361  	for toSubmit > submitted {
   362  		// This loop can take a long time to process, so periodically check for
   363  		// interrupts. This also pets the watchdog.
   364  		if t.Interrupted() {
   365  			return -1, linuxerr.EINTR
   366  		}
   367  
   368  		if fetchRB {
   369  			view, err = fd.ioRingsBuf.view(fd.ioRings.SizeBytes())
   370  			if err != nil {
   371  				return -1, err
   372  			}
   373  		}
   374  
   375  		// Note: The kernel uses sqHead as a cursor and writes cqTail. Userspace
   376  		// uses cqHead as a cursor and writes sqTail.
   377  
   378  		sqHeadPtr := atomicUint32AtOffset(view, int(sqOff.Head))
   379  		sqTailPtr := atomicUint32AtOffset(view, int(sqOff.Tail))
   380  		cqHeadPtr := atomicUint32AtOffset(view, int(cqOff.Head))
   381  		cqTailPtr := atomicUint32AtOffset(view, int(cqOff.Tail))
   382  		overflowPtr := atomicUint32AtOffset(view, int(cqOff.Overflow))
   383  
   384  		// Load the pointers once, so we work with a stable value. Particularly,
   385  		// usersapce can update the SQ tail at any time.
   386  		sqHead := sqHeadPtr.Load()
   387  		sqTail := sqTailPtr.Load()
   388  
   389  		// Is the submission queue is empty?
   390  		if sqHead == sqTail {
   391  			return int(submitted), nil
   392  		}
   393  
   394  		// We have at least one pending sqe, unmarshal the first from the
   395  		// submission queue.
   396  		if fetchSQA {
   397  			sqaView, err = fd.sqesBuf.view(sqArraySize)
   398  			if err != nil {
   399  				return -1, err
   400  			}
   401  		}
   402  		sqaOff := int(sqHead&fd.ioRings.SqRingMask) * sqe.SizeBytes()
   403  		sqe.UnmarshalUnsafe(sqaView[sqaOff : sqaOff+sqe.SizeBytes()])
   404  		fetchSQA = fd.sqesBuf.drop()
   405  
   406  		// Dispatch request from unmarshalled entry.
   407  		cqe := fd.ProcessSubmission(t, &sqe, flags)
   408  
   409  		// Advance sq head.
   410  		sqHeadPtr.Add(1)
   411  
   412  		// Load once so we have stable values. Particularly, userspace can
   413  		// update the CQ head at any time.
   414  		cqHead := cqHeadPtr.Load()
   415  		cqTail := cqTailPtr.Load()
   416  
   417  		// Marshal response to completion queue.
   418  		if (cqTail - cqHead) >= fd.ioRings.CqRingEntries {
   419  			// CQ ring full.
   420  			fd.ioRings.CqOverflow++
   421  			overflowPtr.Store(fd.ioRings.CqOverflow)
   422  		} else {
   423  			// Have room in CQ, marshal CQE.
   424  			if fetchCQA {
   425  				cqaView, err = fd.cqesBuf.view(cqArraySize)
   426  				if err != nil {
   427  					return -1, err
   428  				}
   429  			}
   430  			cqaOff := int(cqTail&fd.ioRings.CqRingMask) * cqe.SizeBytes()
   431  			cqe.MarshalUnsafe(cqaView[cqaOff : cqaOff+cqe.SizeBytes()])
   432  			fetchCQA, err = fd.cqesBuf.writebackWindow(cqaOff, cqe.SizeBytes())
   433  			if err != nil {
   434  				return -1, err
   435  			}
   436  
   437  			// Advance cq tail.
   438  			cqTailPtr.Add(1)
   439  		}
   440  
   441  		fetchRB, err = fd.ioRingsBuf.writeback(fd.ioRings.SizeBytes())
   442  		if err != nil {
   443  			return -1, err
   444  		}
   445  
   446  		submitted++
   447  	}
   448  
   449  	return int(submitted), nil
   450  }
   451  
   452  // ProcessSubmission processes a single submission request.
   453  func (fd *FileDescription) ProcessSubmission(t *kernel.Task, sqe *linux.IOUringSqe, flags uint32) *linux.IOUringCqe {
   454  	var (
   455  		cqeErr   error
   456  		cqeFlags uint32
   457  		retValue int32
   458  	)
   459  
   460  	switch op := sqe.Opcode; op {
   461  	case linux.IORING_OP_NOP:
   462  		// For the NOP operation, we don't do anything special.
   463  	case linux.IORING_OP_READV:
   464  		retValue, cqeErr = fd.handleReadv(t, sqe, flags)
   465  		if cqeErr == io.EOF {
   466  			// Don't raise EOF as errno, error translation will fail. Short
   467  			// reads aren't failures.
   468  			cqeErr = nil
   469  		}
   470  	default: // Unsupported operation
   471  		retValue = -int32(linuxerr.EINVAL.Errno())
   472  	}
   473  
   474  	if cqeErr != nil {
   475  		retValue = -int32(kernel.ExtractErrno(cqeErr, -1))
   476  	}
   477  
   478  	return &linux.IOUringCqe{
   479  		UserData: sqe.UserData,
   480  		Res:      retValue,
   481  		Flags:    cqeFlags,
   482  	}
   483  }
   484  
   485  // handleReadv handles IORING_OP_READV.
   486  func (fd *FileDescription) handleReadv(t *kernel.Task, sqe *linux.IOUringSqe, flags uint32) (int32, error) {
   487  	// Check that a file descriptor is valid.
   488  	if sqe.Fd < 0 {
   489  		return 0, linuxerr.EBADF
   490  	}
   491  	// Currently we don't support any flags for the SQEs.
   492  	if sqe.Flags != 0 {
   493  		return 0, linuxerr.EINVAL
   494  	}
   495  	// If the file is not seekable then offset must be zero. And currently, we don't support them.
   496  	if sqe.OffOrAddrOrCmdOp != 0 {
   497  		return 0, linuxerr.EINVAL
   498  	}
   499  	// ioprio should not be set for the READV operation.
   500  	if sqe.IoPrio != 0 {
   501  		return 0, linuxerr.EINVAL
   502  	}
   503  
   504  	// AddressSpaceActive is set to true as we are doing this from the task goroutine.And this is a
   505  	// case as we currently don't support neither IOPOLL nor SQPOLL modes.
   506  	dst, err := t.IovecsIOSequence(hostarch.Addr(sqe.AddrOrSpliceOff), int(sqe.Len), usermem.IOOpts{
   507  		AddressSpaceActive: true,
   508  	})
   509  	if err != nil {
   510  		return 0, err
   511  	}
   512  	file := t.GetFile(sqe.Fd)
   513  	if file == nil {
   514  		return 0, linuxerr.EBADF
   515  	}
   516  	defer file.DecRef(t)
   517  	n, err := file.PRead(t, dst, 0, vfs.ReadOptions{})
   518  	if err != nil {
   519  		return 0, err
   520  	}
   521  
   522  	return int32(n), nil
   523  }
   524  
   525  // updateCq updates a completion queue by adding a given completion queue entry.
   526  func (fd *FileDescription) updateCq(cqes *safemem.BlockSeq, cqe *linux.IOUringCqe, cqTail uint32) error {
   527  	cqeSize := uint32((*linux.IOUringCqe)(nil).SizeBytes())
   528  	if cqes.NumBlocks() == 1 && !cqes.Head().NeedSafecopy() {
   529  		cqe.MarshalBytes(cqes.Head().ToSlice()[cqTail*cqeSize : (cqTail+1)*cqeSize])
   530  
   531  		return nil
   532  	}
   533  
   534  	buf := make([]byte, cqes.NumBytes())
   535  	cqe.MarshalBytes(buf)
   536  	cp, cperr := safemem.CopySeq(cqes.DropFirst64(uint64(cqTail*cqeSize)), safemem.BlockSeqOf(safemem.BlockFromSafeSlice(buf)))
   537  	if cp == 0 {
   538  		return cperr
   539  	}
   540  
   541  	return nil
   542  }
   543  
   544  // sqEntriesFile implements memmap.Mappable for SQ entries.
   545  //
   546  // +stateify savable
   547  type sqEntriesFile struct {
   548  	fr memmap.FileRange
   549  }
   550  
   551  // AddMapping implements memmap.Mappable.AddMapping.
   552  func (sqemf *sqEntriesFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
   553  	return nil
   554  }
   555  
   556  // RemoveMapping implements memmap.Mappable.RemoveMapping.
   557  func (sqemf *sqEntriesFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
   558  }
   559  
   560  // CopyMapping implements memmap.Mappable.CopyMapping.
   561  func (sqemf *sqEntriesFile) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
   562  	return nil
   563  }
   564  
   565  // Translate implements memmap.Mappable.Translate.
   566  func (sqemf *sqEntriesFile) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
   567  	if required.End > sqemf.fr.Length() {
   568  		return nil, &memmap.BusError{linuxerr.EFAULT}
   569  	}
   570  
   571  	if source := optional.Intersect(memmap.MappableRange{0, sqemf.fr.Length()}); source.Length() != 0 {
   572  		return []memmap.Translation{
   573  			{
   574  				Source: source,
   575  				File:   pgalloc.MemoryFileProviderFromContext(ctx).MemoryFile(),
   576  				Offset: sqemf.fr.Start + source.Start,
   577  				Perms:  at,
   578  			},
   579  		}, nil
   580  	}
   581  
   582  	return nil, linuxerr.EFAULT
   583  }
   584  
   585  // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
   586  func (sqemf *sqEntriesFile) InvalidateUnsavable(ctx context.Context) error {
   587  	return nil
   588  }
   589  
   590  // ringBuffersFile implements memmap.Mappable for SQ and CQ ring buffers.
   591  //
   592  // +stateify savable
   593  type ringsBufferFile struct {
   594  	fr memmap.FileRange
   595  }
   596  
   597  // AddMapping implements memmap.Mappable.AddMapping.
   598  func (rbmf *ringsBufferFile) AddMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) error {
   599  	return nil
   600  }
   601  
   602  // RemoveMapping implements memmap.Mappable.RemoveMapping.
   603  func (rbmf *ringsBufferFile) RemoveMapping(ctx context.Context, ms memmap.MappingSpace, ar hostarch.AddrRange, offset uint64, writable bool) {
   604  }
   605  
   606  // CopyMapping implements memmap.Mappable.CopyMapping.
   607  func (rbmf *ringsBufferFile) CopyMapping(ctx context.Context, ms memmap.MappingSpace, srcAR, dstAR hostarch.AddrRange, offset uint64, writable bool) error {
   608  	return nil
   609  }
   610  
   611  // Translate implements memmap.Mappable.Translate.
   612  func (rbmf *ringsBufferFile) Translate(ctx context.Context, required, optional memmap.MappableRange, at hostarch.AccessType) ([]memmap.Translation, error) {
   613  	if required.End > rbmf.fr.Length() {
   614  		return nil, &memmap.BusError{linuxerr.EFAULT}
   615  	}
   616  
   617  	if source := optional.Intersect(memmap.MappableRange{0, rbmf.fr.Length()}); source.Length() != 0 {
   618  		return []memmap.Translation{
   619  			{
   620  				Source: source,
   621  				File:   pgalloc.MemoryFileProviderFromContext(ctx).MemoryFile(),
   622  				Offset: rbmf.fr.Start + source.Start,
   623  				Perms:  at,
   624  			},
   625  		}, nil
   626  	}
   627  
   628  	return nil, linuxerr.EFAULT
   629  }
   630  
   631  // InvalidateUnsavable implements memmap.Mappable.InvalidateUnsavable.
   632  func (rbmf *ringsBufferFile) InvalidateUnsavable(ctx context.Context) error {
   633  	return nil
   634  }