github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/kernel/eventfd/eventfd.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package eventfd provides an implementation of Linux's file-based event
    16  // notification.
    17  package eventfd
    18  
    19  import (
    20  	"math"
    21  
    22  	"golang.org/x/sys/unix"
    23  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    24  	"github.com/SagerNet/gvisor/pkg/context"
    25  	"github.com/SagerNet/gvisor/pkg/fdnotifier"
    26  	"github.com/SagerNet/gvisor/pkg/hostarch"
    27  	"github.com/SagerNet/gvisor/pkg/sentry/fs"
    28  	"github.com/SagerNet/gvisor/pkg/sentry/fs/anon"
    29  	"github.com/SagerNet/gvisor/pkg/sentry/fs/fsutil"
    30  	"github.com/SagerNet/gvisor/pkg/sync"
    31  	"github.com/SagerNet/gvisor/pkg/syserror"
    32  	"github.com/SagerNet/gvisor/pkg/usermem"
    33  	"github.com/SagerNet/gvisor/pkg/waiter"
    34  )
    35  
    36  // EventOperations represents an event with the semantics of Linux's file-based event
    37  // notification (eventfd). Eventfds are usually internal to the Sentry but in certain
    38  // situations they may be converted into a host-backed eventfd.
    39  //
    40  // +stateify savable
    41  type EventOperations struct {
    42  	fsutil.FileNoopRelease          `state:"nosave"`
    43  	fsutil.FilePipeSeek             `state:"nosave"`
    44  	fsutil.FileNotDirReaddir        `state:"nosave"`
    45  	fsutil.FileNoFsync              `state:"nosave"`
    46  	fsutil.FileNoIoctl              `state:"nosave"`
    47  	fsutil.FileNoMMap               `state:"nosave"`
    48  	fsutil.FileNoSplice             `state:"nosave"`
    49  	fsutil.FileNoopFlush            `state:"nosave"`
    50  	fsutil.FileUseInodeUnstableAttr `state:"nosave"`
    51  
    52  	// Mutex that protects accesses to the fields of this event.
    53  	mu sync.Mutex `state:"nosave"`
    54  
    55  	// Queue is used to notify interested parties when the event object
    56  	// becomes readable or writable.
    57  	wq waiter.Queue `state:"zerovalue"`
    58  
    59  	// val is the current value of the event counter.
    60  	val uint64
    61  
    62  	// semMode specifies whether the event is in "semaphore" mode.
    63  	semMode bool
    64  
    65  	// hostfd indicates whether this eventfd is passed through to the host.
    66  	hostfd int
    67  }
    68  
    69  // New creates a new event object with the supplied initial value and mode.
    70  func New(ctx context.Context, initVal uint64, semMode bool) *fs.File {
    71  	// name matches fs/eventfd.c:eventfd_file_create.
    72  	dirent := fs.NewDirent(ctx, anon.NewInode(ctx), "anon_inode:[eventfd]")
    73  	// Release the initial dirent reference after NewFile takes a reference.
    74  	defer dirent.DecRef(ctx)
    75  	return fs.NewFile(ctx, dirent, fs.FileFlags{Read: true, Write: true}, &EventOperations{
    76  		val:     initVal,
    77  		semMode: semMode,
    78  		hostfd:  -1,
    79  	})
    80  }
    81  
    82  // HostFD returns the host eventfd associated with this event.
    83  func (e *EventOperations) HostFD() (int, error) {
    84  	e.mu.Lock()
    85  	defer e.mu.Unlock()
    86  	if e.hostfd >= 0 {
    87  		return e.hostfd, nil
    88  	}
    89  
    90  	flags := linux.EFD_NONBLOCK
    91  	if e.semMode {
    92  		flags |= linux.EFD_SEMAPHORE
    93  	}
    94  
    95  	fd, _, err := unix.Syscall(unix.SYS_EVENTFD2, uintptr(e.val), uintptr(flags), 0)
    96  	if err != 0 {
    97  		return -1, err
    98  	}
    99  
   100  	if err := fdnotifier.AddFD(int32(fd), &e.wq); err != nil {
   101  		unix.Close(int(fd))
   102  		return -1, err
   103  	}
   104  
   105  	e.hostfd = int(fd)
   106  	return e.hostfd, nil
   107  }
   108  
   109  // Release implements fs.FileOperations.Release.
   110  func (e *EventOperations) Release(context.Context) {
   111  	e.mu.Lock()
   112  	defer e.mu.Unlock()
   113  	if e.hostfd >= 0 {
   114  		fdnotifier.RemoveFD(int32(e.hostfd))
   115  		unix.Close(e.hostfd)
   116  		e.hostfd = -1
   117  	}
   118  }
   119  
   120  // Read implements fs.FileOperations.Read.
   121  func (e *EventOperations) Read(ctx context.Context, _ *fs.File, dst usermem.IOSequence, _ int64) (int64, error) {
   122  	if dst.NumBytes() < 8 {
   123  		return 0, unix.EINVAL
   124  	}
   125  	if err := e.read(ctx, dst); err != nil {
   126  		return 0, err
   127  	}
   128  	return 8, nil
   129  }
   130  
   131  // Write implements fs.FileOperations.Write.
   132  func (e *EventOperations) Write(ctx context.Context, _ *fs.File, src usermem.IOSequence, _ int64) (int64, error) {
   133  	if src.NumBytes() < 8 {
   134  		return 0, unix.EINVAL
   135  	}
   136  	if err := e.write(ctx, src); err != nil {
   137  		return 0, err
   138  	}
   139  	return 8, nil
   140  }
   141  
   142  // Must be called with e.mu locked.
   143  func (e *EventOperations) hostRead(ctx context.Context, dst usermem.IOSequence) error {
   144  	var buf [8]byte
   145  
   146  	if _, err := unix.Read(e.hostfd, buf[:]); err != nil {
   147  		if err == unix.EWOULDBLOCK {
   148  			return syserror.ErrWouldBlock
   149  		}
   150  		return err
   151  	}
   152  
   153  	_, err := dst.CopyOut(ctx, buf[:])
   154  	return err
   155  }
   156  
   157  func (e *EventOperations) read(ctx context.Context, dst usermem.IOSequence) error {
   158  	e.mu.Lock()
   159  
   160  	if e.hostfd >= 0 {
   161  		defer e.mu.Unlock()
   162  		return e.hostRead(ctx, dst)
   163  	}
   164  
   165  	// We can't complete the read if the value is currently zero.
   166  	if e.val == 0 {
   167  		e.mu.Unlock()
   168  		return syserror.ErrWouldBlock
   169  	}
   170  
   171  	// Update the value based on the mode the event is operating in.
   172  	var val uint64
   173  	if e.semMode {
   174  		val = 1
   175  		// Consistent with Linux, this is done even if writing to memory fails.
   176  		e.val--
   177  	} else {
   178  		val = e.val
   179  		e.val = 0
   180  	}
   181  
   182  	e.mu.Unlock()
   183  
   184  	// Notify writers. We do this even if we were already writable because
   185  	// it is possible that a writer is waiting to write the maximum value
   186  	// to the event.
   187  	e.wq.Notify(waiter.WritableEvents)
   188  
   189  	var buf [8]byte
   190  	hostarch.ByteOrder.PutUint64(buf[:], val)
   191  	_, err := dst.CopyOut(ctx, buf[:])
   192  	return err
   193  }
   194  
   195  // Must be called with e.mu locked.
   196  func (e *EventOperations) hostWrite(val uint64) error {
   197  	var buf [8]byte
   198  	hostarch.ByteOrder.PutUint64(buf[:], val)
   199  	_, err := unix.Write(e.hostfd, buf[:])
   200  	if err == unix.EWOULDBLOCK {
   201  		return syserror.ErrWouldBlock
   202  	}
   203  	return err
   204  }
   205  
   206  func (e *EventOperations) write(ctx context.Context, src usermem.IOSequence) error {
   207  	var buf [8]byte
   208  	if _, err := src.CopyIn(ctx, buf[:]); err != nil {
   209  		return err
   210  	}
   211  	val := hostarch.ByteOrder.Uint64(buf[:])
   212  
   213  	return e.Signal(val)
   214  }
   215  
   216  // Signal is an internal function to signal the event fd.
   217  func (e *EventOperations) Signal(val uint64) error {
   218  	if val == math.MaxUint64 {
   219  		return unix.EINVAL
   220  	}
   221  
   222  	e.mu.Lock()
   223  
   224  	if e.hostfd >= 0 {
   225  		defer e.mu.Unlock()
   226  		return e.hostWrite(val)
   227  	}
   228  
   229  	// We only allow writes that won't cause the value to go over the max
   230  	// uint64 minus 1.
   231  	if val > math.MaxUint64-1-e.val {
   232  		e.mu.Unlock()
   233  		return syserror.ErrWouldBlock
   234  	}
   235  
   236  	e.val += val
   237  	e.mu.Unlock()
   238  
   239  	// Always trigger a notification.
   240  	e.wq.Notify(waiter.ReadableEvents)
   241  
   242  	return nil
   243  }
   244  
   245  // Readiness returns the ready events for the event fd.
   246  func (e *EventOperations) Readiness(mask waiter.EventMask) waiter.EventMask {
   247  	e.mu.Lock()
   248  	if e.hostfd >= 0 {
   249  		defer e.mu.Unlock()
   250  		return fdnotifier.NonBlockingPoll(int32(e.hostfd), mask)
   251  	}
   252  
   253  	ready := waiter.EventMask(0)
   254  	if e.val > 0 {
   255  		ready |= waiter.ReadableEvents
   256  	}
   257  
   258  	if e.val < math.MaxUint64-1 {
   259  		ready |= waiter.WritableEvents
   260  	}
   261  	e.mu.Unlock()
   262  
   263  	return mask & ready
   264  }
   265  
   266  // EventRegister implements waiter.Waitable.EventRegister.
   267  func (e *EventOperations) EventRegister(entry *waiter.Entry, mask waiter.EventMask) {
   268  	e.wq.EventRegister(entry, mask)
   269  
   270  	e.mu.Lock()
   271  	defer e.mu.Unlock()
   272  	if e.hostfd >= 0 {
   273  		fdnotifier.UpdateFD(int32(e.hostfd))
   274  	}
   275  }
   276  
   277  // EventUnregister implements waiter.Waitable.EventUnregister.
   278  func (e *EventOperations) EventUnregister(entry *waiter.Entry) {
   279  	e.wq.EventUnregister(entry)
   280  
   281  	e.mu.Lock()
   282  	defer e.mu.Unlock()
   283  	if e.hostfd >= 0 {
   284  		fdnotifier.UpdateFD(int32(e.hostfd))
   285  	}
   286  }