github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/internal/epoll/poller.go

github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/internal/epoll/poller.go (about)

     1  package epoll
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"os"
     7  	"runtime"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/cilium/ebpf/internal"
    12  	"github.com/cilium/ebpf/internal/unix"
    13  )
    14  
    15  // Poller waits for readiness notifications from multiple file descriptors.
    16  //
    17  // The wait can be interrupted by calling Close.
    18  type Poller struct {
    19  	// mutexes protect the fields declared below them. If you need to
    20  	// acquire both at once you must lock epollMu before eventMu.
    21  	epollMu sync.Mutex
    22  	epollFd int
    23  
    24  	eventMu sync.Mutex
    25  	event   *eventFd
    26  }
    27  
    28  func New() (*Poller, error) {
    29  	epollFd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
    30  	if err != nil {
    31  		return nil, fmt.Errorf("create epoll fd: %v", err)
    32  	}
    33  
    34  	p := &Poller{epollFd: epollFd}
    35  	p.event, err = newEventFd()
    36  	if err != nil {
    37  		unix.Close(epollFd)
    38  		return nil, err
    39  	}
    40  
    41  	if err := p.Add(p.event.raw, 0); err != nil {
    42  		unix.Close(epollFd)
    43  		p.event.close()
    44  		return nil, fmt.Errorf("add eventfd: %w", err)
    45  	}
    46  
    47  	runtime.SetFinalizer(p, (*Poller).Close)
    48  	return p, nil
    49  }
    50  
    51  // Close the poller.
    52  //
    53  // Interrupts any calls to Wait. Multiple calls to Close are valid, but subsequent
    54  // calls will return os.ErrClosed.
    55  func (p *Poller) Close() error {
    56  	runtime.SetFinalizer(p, nil)
    57  
    58  	// Interrupt Wait() via the event fd if it's currently blocked.
    59  	if err := p.wakeWait(); err != nil {
    60  		return err
    61  	}
    62  
    63  	// Acquire the lock. This ensures that Wait isn't running.
    64  	p.epollMu.Lock()
    65  	defer p.epollMu.Unlock()
    66  
    67  	// Prevent other calls to Close().
    68  	p.eventMu.Lock()
    69  	defer p.eventMu.Unlock()
    70  
    71  	if p.epollFd != -1 {
    72  		unix.Close(p.epollFd)
    73  		p.epollFd = -1
    74  	}
    75  
    76  	if p.event != nil {
    77  		p.event.close()
    78  		p.event = nil
    79  	}
    80  
    81  	return nil
    82  }
    83  
    84  // Add an fd to the poller.
    85  //
    86  // id is returned by Wait in the unix.EpollEvent.Pad field any may be zero. It
    87  // must not exceed math.MaxInt32.
    88  //
    89  // Add is blocked by Wait.
    90  func (p *Poller) Add(fd int, id int) error {
    91  	if int64(id) > math.MaxInt32 {
    92  		return fmt.Errorf("unsupported id: %d", id)
    93  	}
    94  
    95  	p.epollMu.Lock()
    96  	defer p.epollMu.Unlock()
    97  
    98  	if p.epollFd == -1 {
    99  		return fmt.Errorf("epoll add: %w", os.ErrClosed)
   100  	}
   101  
   102  	// The representation of EpollEvent isn't entirely accurate.
   103  	// Pad is fully usable, not just padding. Hence we stuff the
   104  	// id in there, which allows us to identify the event later (e.g.,
   105  	// in case of perf events, which CPU sent it).
   106  	event := unix.EpollEvent{
   107  		Events: unix.EPOLLIN,
   108  		Fd:     int32(fd),
   109  		Pad:    int32(id),
   110  	}
   111  
   112  	if err := unix.EpollCtl(p.epollFd, unix.EPOLL_CTL_ADD, fd, &event); err != nil {
   113  		return fmt.Errorf("add fd to epoll: %v", err)
   114  	}
   115  
   116  	return nil
   117  }
   118  
   119  // Wait for events.
   120  //
   121  // Returns the number of pending events or an error wrapping os.ErrClosed if
   122  // Close is called, or os.ErrDeadlineExceeded if EpollWait timeout.
   123  func (p *Poller) Wait(events []unix.EpollEvent, deadline time.Time) (int, error) {
   124  	p.epollMu.Lock()
   125  	defer p.epollMu.Unlock()
   126  
   127  	if p.epollFd == -1 {
   128  		return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
   129  	}
   130  
   131  	for {
   132  		timeout := int(-1)
   133  		if !deadline.IsZero() {
   134  			msec := time.Until(deadline).Milliseconds()
   135  			// Deadline is in the past, don't block.
   136  			msec = max(msec, 0)
   137  			// Deadline is too far in the future.
   138  			msec = min(msec, math.MaxInt)
   139  
   140  			timeout = int(msec)
   141  		}
   142  
   143  		n, err := unix.EpollWait(p.epollFd, events, timeout)
   144  		if temp, ok := err.(temporaryError); ok && temp.Temporary() {
   145  			// Retry the syscall if we were interrupted, see https://github.com/golang/go/issues/20400
   146  			continue
   147  		}
   148  
   149  		if err != nil {
   150  			return 0, err
   151  		}
   152  
   153  		if n == 0 {
   154  			return 0, fmt.Errorf("epoll wait: %w", os.ErrDeadlineExceeded)
   155  		}
   156  
   157  		for _, event := range events[:n] {
   158  			if int(event.Fd) == p.event.raw {
   159  				// Since we don't read p.event the event is never cleared and
   160  				// we'll keep getting this wakeup until Close() acquires the
   161  				// lock and sets p.epollFd = -1.
   162  				return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
   163  			}
   164  		}
   165  
   166  		return n, nil
   167  	}
   168  }
   169  
   170  type temporaryError interface {
   171  	Temporary() bool
   172  }
   173  
   174  // wakeWait unblocks Wait if it's epoll_wait.
   175  func (p *Poller) wakeWait() error {
   176  	p.eventMu.Lock()
   177  	defer p.eventMu.Unlock()
   178  
   179  	if p.event == nil {
   180  		return fmt.Errorf("epoll wake: %w", os.ErrClosed)
   181  	}
   182  
   183  	return p.event.add(1)
   184  }
   185  
   186  // eventFd wraps a Linux eventfd.
   187  //
   188  // An eventfd acts like a counter: writes add to the counter, reads retrieve
   189  // the counter and reset it to zero. Reads also block if the counter is zero.
   190  //
   191  // See man 2 eventfd.
   192  type eventFd struct {
   193  	file *os.File
   194  	// prefer raw over file.Fd(), since the latter puts the file into blocking
   195  	// mode.
   196  	raw int
   197  }
   198  
   199  func newEventFd() (*eventFd, error) {
   200  	fd, err := unix.Eventfd(0, unix.O_CLOEXEC|unix.O_NONBLOCK)
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	file := os.NewFile(uintptr(fd), "event")
   205  	return &eventFd{file, fd}, nil
   206  }
   207  
   208  func (efd *eventFd) close() error {
   209  	return efd.file.Close()
   210  }
   211  
   212  func (efd *eventFd) add(n uint64) error {
   213  	var buf [8]byte
   214  	internal.NativeEndian.PutUint64(buf[:], n)
   215  	_, err := efd.file.Write(buf[:])
   216  	return err
   217  }
   218  
   219  func (efd *eventFd) read() (uint64, error) {
   220  	var buf [8]byte
   221  	_, err := efd.file.Read(buf[:])
   222  	return internal.NativeEndian.Uint64(buf[:]), err
   223  }