github.com/cilium/ebpf@v0.10.0/internal/epoll/poller.go (about)

     1  package epoll
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"os"
     7  	"runtime"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/cilium/ebpf/internal"
    12  	"github.com/cilium/ebpf/internal/unix"
    13  )
    14  
    15  // Poller waits for readiness notifications from multiple file descriptors.
    16  //
    17  // The wait can be interrupted by calling Close.
    18  type Poller struct {
    19  	// mutexes protect the fields declared below them. If you need to
    20  	// acquire both at once you must lock epollMu before eventMu.
    21  	epollMu sync.Mutex
    22  	epollFd int
    23  
    24  	eventMu sync.Mutex
    25  	event   *eventFd
    26  }
    27  
    28  func New() (*Poller, error) {
    29  	epollFd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
    30  	if err != nil {
    31  		return nil, fmt.Errorf("create epoll fd: %v", err)
    32  	}
    33  
    34  	p := &Poller{epollFd: epollFd}
    35  	p.event, err = newEventFd()
    36  	if err != nil {
    37  		unix.Close(epollFd)
    38  		return nil, err
    39  	}
    40  
    41  	if err := p.Add(p.event.raw, 0); err != nil {
    42  		unix.Close(epollFd)
    43  		p.event.close()
    44  		return nil, fmt.Errorf("add eventfd: %w", err)
    45  	}
    46  
    47  	runtime.SetFinalizer(p, (*Poller).Close)
    48  	return p, nil
    49  }
    50  
    51  // Close the poller.
    52  //
    53  // Interrupts any calls to Wait. Multiple calls to Close are valid, but subsequent
    54  // calls will return os.ErrClosed.
    55  func (p *Poller) Close() error {
    56  	runtime.SetFinalizer(p, nil)
    57  
    58  	// Interrupt Wait() via the event fd if it's currently blocked.
    59  	if err := p.wakeWait(); err != nil {
    60  		return err
    61  	}
    62  
    63  	// Acquire the lock. This ensures that Wait isn't running.
    64  	p.epollMu.Lock()
    65  	defer p.epollMu.Unlock()
    66  
    67  	// Prevent other calls to Close().
    68  	p.eventMu.Lock()
    69  	defer p.eventMu.Unlock()
    70  
    71  	if p.epollFd != -1 {
    72  		unix.Close(p.epollFd)
    73  		p.epollFd = -1
    74  	}
    75  
    76  	if p.event != nil {
    77  		p.event.close()
    78  		p.event = nil
    79  	}
    80  
    81  	return nil
    82  }
    83  
    84  // Add an fd to the poller.
    85  //
    86  // id is returned by Wait in the unix.EpollEvent.Pad field any may be zero. It
    87  // must not exceed math.MaxInt32.
    88  //
    89  // Add is blocked by Wait.
    90  func (p *Poller) Add(fd int, id int) error {
    91  	if int64(id) > math.MaxInt32 {
    92  		return fmt.Errorf("unsupported id: %d", id)
    93  	}
    94  
    95  	p.epollMu.Lock()
    96  	defer p.epollMu.Unlock()
    97  
    98  	if p.epollFd == -1 {
    99  		return fmt.Errorf("epoll add: %w", os.ErrClosed)
   100  	}
   101  
   102  	// The representation of EpollEvent isn't entirely accurate.
   103  	// Pad is fully useable, not just padding. Hence we stuff the
   104  	// id in there, which allows us to identify the event later (e.g.,
   105  	// in case of perf events, which CPU sent it).
   106  	event := unix.EpollEvent{
   107  		Events: unix.EPOLLIN,
   108  		Fd:     int32(fd),
   109  		Pad:    int32(id),
   110  	}
   111  
   112  	if err := unix.EpollCtl(p.epollFd, unix.EPOLL_CTL_ADD, fd, &event); err != nil {
   113  		return fmt.Errorf("add fd to epoll: %v", err)
   114  	}
   115  
   116  	return nil
   117  }
   118  
   119  // Wait for events.
   120  //
   121  // Returns the number of pending events or an error wrapping os.ErrClosed if
   122  // Close is called, or os.ErrDeadlineExceeded if EpollWait timeout.
   123  func (p *Poller) Wait(events []unix.EpollEvent, deadline time.Time) (int, error) {
   124  	p.epollMu.Lock()
   125  	defer p.epollMu.Unlock()
   126  
   127  	if p.epollFd == -1 {
   128  		return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
   129  	}
   130  
   131  	for {
   132  		timeout := int(-1)
   133  		if !deadline.IsZero() {
   134  			msec := time.Until(deadline).Milliseconds()
   135  			if msec < 0 {
   136  				// Deadline is in the past.
   137  				msec = 0
   138  			} else if msec > math.MaxInt {
   139  				// Deadline is too far in the future.
   140  				msec = math.MaxInt
   141  			}
   142  			timeout = int(msec)
   143  		}
   144  
   145  		n, err := unix.EpollWait(p.epollFd, events, timeout)
   146  		if temp, ok := err.(temporaryError); ok && temp.Temporary() {
   147  			// Retry the syscall if we were interrupted, see https://github.com/golang/go/issues/20400
   148  			continue
   149  		}
   150  
   151  		if err != nil {
   152  			return 0, err
   153  		}
   154  
   155  		if n == 0 {
   156  			return 0, fmt.Errorf("epoll wait: %w", os.ErrDeadlineExceeded)
   157  		}
   158  
   159  		for _, event := range events[:n] {
   160  			if int(event.Fd) == p.event.raw {
   161  				// Since we don't read p.event the event is never cleared and
   162  				// we'll keep getting this wakeup until Close() acquires the
   163  				// lock and sets p.epollFd = -1.
   164  				return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed)
   165  			}
   166  		}
   167  
   168  		return n, nil
   169  	}
   170  }
   171  
   172  type temporaryError interface {
   173  	Temporary() bool
   174  }
   175  
   176  // waitWait unblocks Wait if it's epoll_wait.
   177  func (p *Poller) wakeWait() error {
   178  	p.eventMu.Lock()
   179  	defer p.eventMu.Unlock()
   180  
   181  	if p.event == nil {
   182  		return fmt.Errorf("epoll wake: %w", os.ErrClosed)
   183  	}
   184  
   185  	return p.event.add(1)
   186  }
   187  
   188  // eventFd wraps a Linux eventfd.
   189  //
   190  // An eventfd acts like a counter: writes add to the counter, reads retrieve
   191  // the counter and reset it to zero. Reads also block if the counter is zero.
   192  //
   193  // See man 2 eventfd.
   194  type eventFd struct {
   195  	file *os.File
   196  	// prefer raw over file.Fd(), since the latter puts the file into blocking
   197  	// mode.
   198  	raw int
   199  }
   200  
   201  func newEventFd() (*eventFd, error) {
   202  	fd, err := unix.Eventfd(0, unix.O_CLOEXEC|unix.O_NONBLOCK)
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  	file := os.NewFile(uintptr(fd), "event")
   207  	return &eventFd{file, fd}, nil
   208  }
   209  
   210  func (efd *eventFd) close() error {
   211  	return efd.file.Close()
   212  }
   213  
   214  func (efd *eventFd) add(n uint64) error {
   215  	var buf [8]byte
   216  	internal.NativeEndian.PutUint64(buf[:], 1)
   217  	_, err := efd.file.Write(buf[:])
   218  	return err
   219  }
   220  
   221  func (efd *eventFd) read() (uint64, error) {
   222  	var buf [8]byte
   223  	_, err := efd.file.Read(buf[:])
   224  	return internal.NativeEndian.Uint64(buf[:]), err
   225  }