github.com/cilium/ebpf@v0.16.0/internal/epoll/poller.go (about)

     1  package epoll
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"math"
     7  	"os"
     8  	"runtime"
     9  	"slices"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/cilium/ebpf/internal"
    14  	"github.com/cilium/ebpf/internal/unix"
    15  )
    16  
    17  var ErrFlushed = errors.New("data was flushed")
    18  
    19  // Poller waits for readiness notifications from multiple file descriptors.
    20  //
    21  // The wait can be interrupted by calling Close.
    22  type Poller struct {
    23  	// mutexes protect the fields declared below them. If you need to
    24  	// acquire both at once you must lock epollMu before eventMu.
    25  	epollMu sync.Mutex
    26  	epollFd int
    27  
    28  	eventMu    sync.Mutex
    29  	closeEvent *eventFd
    30  	flushEvent *eventFd
    31  }
    32  
    33  func New() (_ *Poller, err error) {
    34  	closeFDOnError := func(fd int) {
    35  		if err != nil {
    36  			unix.Close(fd)
    37  		}
    38  	}
    39  	closeEventFDOnError := func(e *eventFd) {
    40  		if err != nil {
    41  			e.close()
    42  		}
    43  	}
    44  
    45  	epollFd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
    46  	if err != nil {
    47  		return nil, fmt.Errorf("create epoll fd: %v", err)
    48  	}
    49  	defer closeFDOnError(epollFd)
    50  
    51  	p := &Poller{epollFd: epollFd}
    52  	p.closeEvent, err = newEventFd()
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  	defer closeEventFDOnError(p.closeEvent)
    57  
    58  	p.flushEvent, err = newEventFd()
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  	defer closeEventFDOnError(p.flushEvent)
    63  
    64  	if err := p.Add(p.closeEvent.raw, 0); err != nil {
    65  		return nil, fmt.Errorf("add close eventfd: %w", err)
    66  	}
    67  
    68  	if err := p.Add(p.flushEvent.raw, 0); err != nil {
    69  		return nil, fmt.Errorf("add flush eventfd: %w", err)
    70  	}
    71  
    72  	runtime.SetFinalizer(p, (*Poller).Close)
    73  	return p, nil
    74  }
    75  
    76  // Close the poller.
    77  //
    78  // Interrupts any calls to Wait. Multiple calls to Close are valid, but subsequent
    79  // calls will return os.ErrClosed.
    80  func (p *Poller) Close() error {
    81  	runtime.SetFinalizer(p, nil)
    82  
    83  	// Interrupt Wait() via the closeEvent fd if it's currently blocked.
    84  	if err := p.wakeWaitForClose(); err != nil {
    85  		return err
    86  	}
    87  
    88  	// Acquire the lock. This ensures that Wait isn't running.
    89  	p.epollMu.Lock()
    90  	defer p.epollMu.Unlock()
    91  
    92  	// Prevent other calls to Close().
    93  	p.eventMu.Lock()
    94  	defer p.eventMu.Unlock()
    95  
    96  	if p.epollFd != -1 {
    97  		unix.Close(p.epollFd)
    98  		p.epollFd = -1
    99  	}
   100  
   101  	if p.closeEvent != nil {
   102  		p.closeEvent.close()
   103  		p.closeEvent = nil
   104  	}
   105  
   106  	if p.flushEvent != nil {
   107  		p.flushEvent.close()
   108  		p.flushEvent = nil
   109  	}
   110  
   111  	return nil
   112  }
   113  
   114  // Add an fd to the poller.
   115  //
   116  // id is returned by Wait in the unix.EpollEvent.Pad field any may be zero. It
   117  // must not exceed math.MaxInt32.
   118  //
   119  // Add is blocked by Wait.
   120  func (p *Poller) Add(fd int, id int) error {
   121  	if int64(id) > math.MaxInt32 {
   122  		return fmt.Errorf("unsupported id: %d", id)
   123  	}
   124  
   125  	p.epollMu.Lock()
   126  	defer p.epollMu.Unlock()
   127  
   128  	if p.epollFd == -1 {
   129  		return fmt.Errorf("epoll add: %w", os.ErrClosed)
   130  	}
   131  
   132  	// The representation of EpollEvent isn't entirely accurate.
   133  	// Pad is fully usable, not just padding. Hence we stuff the
   134  	// id in there, which allows us to identify the event later (e.g.,
   135  	// in case of perf events, which CPU sent it).
   136  	event := unix.EpollEvent{
   137  		Events: unix.EPOLLIN,
   138  		Fd:     int32(fd),
   139  		Pad:    int32(id),
   140  	}
   141  
   142  	if err := unix.EpollCtl(p.epollFd, unix.EPOLL_CTL_ADD, fd, &event); err != nil {
   143  		return fmt.Errorf("add fd to epoll: %v", err)
   144  	}
   145  
   146  	return nil
   147  }
   148  
   149  // Wait for events.
   150  //
   151  // Returns the number of pending events and any errors.
   152  //
   153  //   - [os.ErrClosed] if interrupted by [Close].
   154  //   - [ErrFlushed] if interrupted by [Flush].
   155  //   - [os.ErrDeadlineExceeded] if deadline is reached.
   156  func (p *Poller) Wait(events []unix.EpollEvent, deadline time.Time) (int, error) {
   157  	p.epollMu.Lock()
   158  	defer p.epollMu.Unlock()
   159  
   160  	if p.epollFd == -1 {
   161  		return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
   162  	}
   163  
   164  	for {
   165  		timeout := int(-1)
   166  		if !deadline.IsZero() {
   167  			msec := time.Until(deadline).Milliseconds()
   168  			// Deadline is in the past, don't block.
   169  			msec = max(msec, 0)
   170  			// Deadline is too far in the future.
   171  			msec = min(msec, math.MaxInt)
   172  
   173  			timeout = int(msec)
   174  		}
   175  
   176  		n, err := unix.EpollWait(p.epollFd, events, timeout)
   177  		if temp, ok := err.(temporaryError); ok && temp.Temporary() {
   178  			// Retry the syscall if we were interrupted, see https://github.com/golang/go/issues/20400
   179  			continue
   180  		}
   181  
   182  		if err != nil {
   183  			return 0, err
   184  		}
   185  
   186  		if n == 0 {
   187  			return 0, fmt.Errorf("epoll wait: %w", os.ErrDeadlineExceeded)
   188  		}
   189  
   190  		for i := 0; i < n; {
   191  			event := events[i]
   192  			if int(event.Fd) == p.closeEvent.raw {
   193  				// Since we don't read p.closeEvent the event is never cleared and
   194  				// we'll keep getting this wakeup until Close() acquires the
   195  				// lock and sets p.epollFd = -1.
   196  				return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
   197  			}
   198  			if int(event.Fd) == p.flushEvent.raw {
   199  				// read event to prevent it from continuing to wake
   200  				p.flushEvent.read()
   201  				err = ErrFlushed
   202  				events = slices.Delete(events, i, i+1)
   203  				n -= 1
   204  				continue
   205  			}
   206  			i++
   207  		}
   208  
   209  		return n, err
   210  	}
   211  }
   212  
   213  type temporaryError interface {
   214  	Temporary() bool
   215  }
   216  
   217  // wakeWaitForClose unblocks Wait if it's epoll_wait.
   218  func (p *Poller) wakeWaitForClose() error {
   219  	p.eventMu.Lock()
   220  	defer p.eventMu.Unlock()
   221  
   222  	if p.closeEvent == nil {
   223  		return fmt.Errorf("epoll wake: %w", os.ErrClosed)
   224  	}
   225  
   226  	return p.closeEvent.add(1)
   227  }
   228  
   229  // Flush unblocks Wait if it's epoll_wait, for purposes of reading pending samples
   230  func (p *Poller) Flush() error {
   231  	p.eventMu.Lock()
   232  	defer p.eventMu.Unlock()
   233  
   234  	if p.flushEvent == nil {
   235  		return fmt.Errorf("epoll wake: %w", os.ErrClosed)
   236  	}
   237  
   238  	return p.flushEvent.add(1)
   239  }
   240  
   241  // eventFd wraps a Linux eventfd.
   242  //
   243  // An eventfd acts like a counter: writes add to the counter, reads retrieve
   244  // the counter and reset it to zero. Reads also block if the counter is zero.
   245  //
   246  // See man 2 eventfd.
   247  type eventFd struct {
   248  	file *os.File
   249  	// prefer raw over file.Fd(), since the latter puts the file into blocking
   250  	// mode.
   251  	raw int
   252  }
   253  
   254  func newEventFd() (*eventFd, error) {
   255  	fd, err := unix.Eventfd(0, unix.O_CLOEXEC|unix.O_NONBLOCK)
   256  	if err != nil {
   257  		return nil, err
   258  	}
   259  	file := os.NewFile(uintptr(fd), "event")
   260  	return &eventFd{file, fd}, nil
   261  }
   262  
   263  func (efd *eventFd) close() error {
   264  	return efd.file.Close()
   265  }
   266  
   267  func (efd *eventFd) add(n uint64) error {
   268  	var buf [8]byte
   269  	internal.NativeEndian.PutUint64(buf[:], n)
   270  	_, err := efd.file.Write(buf[:])
   271  	return err
   272  }
   273  
   274  func (efd *eventFd) read() (uint64, error) {
   275  	var buf [8]byte
   276  	_, err := efd.file.Read(buf[:])
   277  	return internal.NativeEndian.Uint64(buf[:]), err
   278  }