gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/fsimpl/eventfd/eventfd.go (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package eventfd implements event fds.
    16  package eventfd
    17  
    18  import (
    19  	"fmt"
    20  	"math"
    21  	"sync"
    22  
    23  	"golang.org/x/sys/unix"
    24  	"gvisor.dev/gvisor/pkg/abi/linux"
    25  	"gvisor.dev/gvisor/pkg/context"
    26  	"gvisor.dev/gvisor/pkg/errors/linuxerr"
    27  	"gvisor.dev/gvisor/pkg/fdnotifier"
    28  	"gvisor.dev/gvisor/pkg/hostarch"
    29  	"gvisor.dev/gvisor/pkg/log"
    30  	"gvisor.dev/gvisor/pkg/sentry/vfs"
    31  	"gvisor.dev/gvisor/pkg/usermem"
    32  	"gvisor.dev/gvisor/pkg/waiter"
    33  )
    34  
    35  // EventFileDescription implements vfs.FileDescriptionImpl for file-based event
    36  // notification (eventfd). Eventfds are usually internal to the Sentry but in
    37  // certain situations they may be converted into a host-backed eventfd.
    38  //
    39  // +stateify savable
    40  type EventFileDescription struct {
    41  	vfsfd vfs.FileDescription
    42  	vfs.FileDescriptionDefaultImpl
    43  	vfs.DentryMetadataFileDescriptionImpl
    44  	vfs.NoLockFD
    45  
    46  	// queue is used to notify interested parties when the event object
    47  	// becomes readable or writable.
    48  	queue waiter.Queue
    49  
    50  	// mu protects the fields below.
    51  	mu sync.Mutex `state:"nosave"`
    52  
    53  	// val is the current value of the event counter.
    54  	val uint64
    55  
    56  	// semMode specifies whether the event is in "semaphore" mode.
    57  	semMode bool
    58  
    59  	// hostfd indicates whether this eventfd is passed through to the host.
    60  	hostfd int
    61  }
    62  
    63  var _ vfs.FileDescriptionImpl = (*EventFileDescription)(nil)
    64  
    65  // New creates a new event fd.
    66  func New(ctx context.Context, vfsObj *vfs.VirtualFilesystem, initVal uint64, semMode bool, flags uint32) (*vfs.FileDescription, error) {
    67  	vd := vfsObj.NewAnonVirtualDentry("[eventfd]")
    68  	defer vd.DecRef(ctx)
    69  	efd := &EventFileDescription{
    70  		val:     initVal,
    71  		semMode: semMode,
    72  		hostfd:  -1,
    73  	}
    74  	if err := efd.vfsfd.Init(efd, flags, vd.Mount(), vd.Dentry(), &vfs.FileDescriptionOptions{
    75  		UseDentryMetadata: true,
    76  		DenyPRead:         true,
    77  		DenyPWrite:        true,
    78  		DenySpliceIn:      true,
    79  	}); err != nil {
    80  		return nil, err
    81  	}
    82  	return &efd.vfsfd, nil
    83  }
    84  
    85  // HostFD returns the host eventfd associated with this event.
    86  func (efd *EventFileDescription) HostFD() (int, error) {
    87  	efd.mu.Lock()
    88  	defer efd.mu.Unlock()
    89  	if efd.hostfd >= 0 {
    90  		return efd.hostfd, nil
    91  	}
    92  
    93  	flags := linux.EFD_NONBLOCK
    94  	if efd.semMode {
    95  		flags |= linux.EFD_SEMAPHORE
    96  	}
    97  
    98  	fd, _, errno := unix.Syscall(unix.SYS_EVENTFD2, uintptr(efd.val), uintptr(flags), 0)
    99  	if errno != 0 {
   100  		return -1, errno
   101  	}
   102  
   103  	if err := fdnotifier.AddFD(int32(fd), &efd.queue); err != nil {
   104  		if closeErr := unix.Close(int(fd)); closeErr != nil {
   105  			log.Warningf("close(%d) eventfd failed: %v", fd, closeErr)
   106  		}
   107  		return -1, err
   108  	}
   109  
   110  	efd.hostfd = int(fd)
   111  	return efd.hostfd, nil
   112  }
   113  
   114  // Release implements vfs.FileDescriptionImpl.Release.
   115  func (efd *EventFileDescription) Release(context.Context) {
   116  	efd.mu.Lock()
   117  	defer efd.mu.Unlock()
   118  	if efd.hostfd >= 0 {
   119  		fdnotifier.RemoveFD(int32(efd.hostfd))
   120  		if closeErr := unix.Close(int(efd.hostfd)); closeErr != nil {
   121  			log.Warningf("close(%d) eventfd failed: %v", efd.hostfd, closeErr)
   122  		}
   123  		efd.hostfd = -1
   124  	}
   125  }
   126  
   127  // Read implements vfs.FileDescriptionImpl.Read.
   128  func (efd *EventFileDescription) Read(ctx context.Context, dst usermem.IOSequence, _ vfs.ReadOptions) (int64, error) {
   129  	if dst.NumBytes() < 8 {
   130  		return 0, unix.EINVAL
   131  	}
   132  	if err := efd.read(ctx, dst); err != nil {
   133  		return 0, err
   134  	}
   135  	return 8, nil
   136  }
   137  
   138  // Write implements vfs.FileDescriptionImpl.Write.
   139  func (efd *EventFileDescription) Write(ctx context.Context, src usermem.IOSequence, _ vfs.WriteOptions) (int64, error) {
   140  	if src.NumBytes() < 8 {
   141  		return 0, unix.EINVAL
   142  	}
   143  	if err := efd.write(ctx, src); err != nil {
   144  		return 0, err
   145  	}
   146  	return 8, nil
   147  }
   148  
   149  // Preconditions: Must be called with efd.mu locked.
   150  func (efd *EventFileDescription) hostReadLocked(ctx context.Context, dst usermem.IOSequence) error {
   151  	var buf [8]byte
   152  	if _, err := unix.Read(efd.hostfd, buf[:]); err != nil {
   153  		if err == unix.EWOULDBLOCK {
   154  			return linuxerr.ErrWouldBlock
   155  		}
   156  		return err
   157  	}
   158  	_, err := dst.CopyOut(ctx, buf[:])
   159  	return err
   160  }
   161  
   162  func (efd *EventFileDescription) read(ctx context.Context, dst usermem.IOSequence) error {
   163  	efd.mu.Lock()
   164  	if efd.hostfd >= 0 {
   165  		defer efd.mu.Unlock()
   166  		return efd.hostReadLocked(ctx, dst)
   167  	}
   168  
   169  	// We can't complete the read if the value is currently zero.
   170  	if efd.val == 0 {
   171  		efd.mu.Unlock()
   172  		return linuxerr.ErrWouldBlock
   173  	}
   174  
   175  	// Update the value based on the mode the event is operating in.
   176  	var val uint64
   177  	if efd.semMode {
   178  		val = 1
   179  		// Consistent with Linux, this is done even if writing to memory fails.
   180  		efd.val--
   181  	} else {
   182  		val = efd.val
   183  		efd.val = 0
   184  	}
   185  
   186  	efd.mu.Unlock()
   187  
   188  	// Notify writers. We do this even if we were already writable because
   189  	// it is possible that a writer is waiting to write the maximum value
   190  	// to the event.
   191  	efd.queue.Notify(waiter.WritableEvents)
   192  
   193  	var buf [8]byte
   194  	hostarch.ByteOrder.PutUint64(buf[:], val)
   195  	_, err := dst.CopyOut(ctx, buf[:])
   196  	return err
   197  }
   198  
   199  // Preconditions: Must be called with efd.mu locked.
   200  func (efd *EventFileDescription) hostWriteLocked(val uint64) error {
   201  	var buf [8]byte
   202  	hostarch.ByteOrder.PutUint64(buf[:], val)
   203  	_, err := unix.Write(efd.hostfd, buf[:])
   204  	if err == unix.EWOULDBLOCK {
   205  		return linuxerr.ErrWouldBlock
   206  	}
   207  	return err
   208  }
   209  
   210  func (efd *EventFileDescription) write(ctx context.Context, src usermem.IOSequence) error {
   211  	var buf [8]byte
   212  	if _, err := src.CopyIn(ctx, buf[:]); err != nil {
   213  		return err
   214  	}
   215  	val := hostarch.ByteOrder.Uint64(buf[:])
   216  
   217  	return efd.Signal(val)
   218  }
   219  
   220  // Signal is an internal function to signal the event fd.
   221  func (efd *EventFileDescription) Signal(val uint64) error {
   222  	if val == math.MaxUint64 {
   223  		return unix.EINVAL
   224  	}
   225  
   226  	efd.mu.Lock()
   227  
   228  	if efd.hostfd >= 0 {
   229  		defer efd.mu.Unlock()
   230  		return efd.hostWriteLocked(val)
   231  	}
   232  
   233  	// We only allow writes that won't cause the value to go over the max
   234  	// uint64 minus 1.
   235  	if val > math.MaxUint64-1-efd.val {
   236  		efd.mu.Unlock()
   237  		return linuxerr.ErrWouldBlock
   238  	}
   239  
   240  	efd.val += val
   241  	efd.mu.Unlock()
   242  
   243  	// Always trigger a notification.
   244  	efd.queue.Notify(waiter.ReadableEvents)
   245  
   246  	return nil
   247  }
   248  
   249  // Readiness implements waiter.Waitable.Readiness.
   250  func (efd *EventFileDescription) Readiness(mask waiter.EventMask) waiter.EventMask {
   251  	efd.mu.Lock()
   252  	defer efd.mu.Unlock()
   253  
   254  	if efd.hostfd >= 0 {
   255  		return fdnotifier.NonBlockingPoll(int32(efd.hostfd), mask)
   256  	}
   257  
   258  	ready := waiter.EventMask(0)
   259  	if efd.val > 0 {
   260  		ready |= waiter.ReadableEvents
   261  	}
   262  
   263  	if efd.val < math.MaxUint64-1 {
   264  		ready |= waiter.WritableEvents
   265  	}
   266  
   267  	return mask & ready
   268  }
   269  
   270  // EventRegister implements waiter.Waitable.EventRegister.
   271  func (efd *EventFileDescription) EventRegister(entry *waiter.Entry) error {
   272  	efd.queue.EventRegister(entry)
   273  
   274  	efd.mu.Lock()
   275  	defer efd.mu.Unlock()
   276  	if efd.hostfd >= 0 {
   277  		if err := fdnotifier.UpdateFD(int32(efd.hostfd)); err != nil {
   278  			efd.queue.EventUnregister(entry)
   279  			return err
   280  		}
   281  	}
   282  	return nil
   283  }
   284  
   285  // EventUnregister implements waiter.Waitable.EventUnregister.
   286  func (efd *EventFileDescription) EventUnregister(entry *waiter.Entry) {
   287  	efd.queue.EventUnregister(entry)
   288  
   289  	efd.mu.Lock()
   290  	defer efd.mu.Unlock()
   291  	if efd.hostfd >= 0 {
   292  		if err := fdnotifier.UpdateFD(int32(efd.hostfd)); err != nil {
   293  			panic(fmt.Sprint("UpdateFD:", err))
   294  		}
   295  	}
   296  }
   297  
   298  // Epollable implements FileDescriptionImpl.Epollable.
   299  func (efd *EventFileDescription) Epollable() bool {
   300  	return true
   301  }