github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/internal/epoll/poller.go (about) 1 package epoll 2 3 import ( 4 "fmt" 5 "math" 6 "os" 7 "runtime" 8 "sync" 9 "time" 10 11 "github.com/cilium/ebpf/internal" 12 "github.com/cilium/ebpf/internal/unix" 13 ) 14 15 // Poller waits for readiness notifications from multiple file descriptors. 16 // 17 // The wait can be interrupted by calling Close. 18 type Poller struct { 19 // mutexes protect the fields declared below them. If you need to 20 // acquire both at once you must lock epollMu before eventMu. 21 epollMu sync.Mutex 22 epollFd int 23 24 eventMu sync.Mutex 25 event *eventFd 26 } 27 28 func New() (*Poller, error) { 29 epollFd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) 30 if err != nil { 31 return nil, fmt.Errorf("create epoll fd: %v", err) 32 } 33 34 p := &Poller{epollFd: epollFd} 35 p.event, err = newEventFd() 36 if err != nil { 37 unix.Close(epollFd) 38 return nil, err 39 } 40 41 if err := p.Add(p.event.raw, 0); err != nil { 42 unix.Close(epollFd) 43 p.event.close() 44 return nil, fmt.Errorf("add eventfd: %w", err) 45 } 46 47 runtime.SetFinalizer(p, (*Poller).Close) 48 return p, nil 49 } 50 51 // Close the poller. 52 // 53 // Interrupts any calls to Wait. Multiple calls to Close are valid, but subsequent 54 // calls will return os.ErrClosed. 55 func (p *Poller) Close() error { 56 runtime.SetFinalizer(p, nil) 57 58 // Interrupt Wait() via the event fd if it's currently blocked. 59 if err := p.wakeWait(); err != nil { 60 return err 61 } 62 63 // Acquire the lock. This ensures that Wait isn't running. 64 p.epollMu.Lock() 65 defer p.epollMu.Unlock() 66 67 // Prevent other calls to Close(). 68 p.eventMu.Lock() 69 defer p.eventMu.Unlock() 70 71 if p.epollFd != -1 { 72 unix.Close(p.epollFd) 73 p.epollFd = -1 74 } 75 76 if p.event != nil { 77 p.event.close() 78 p.event = nil 79 } 80 81 return nil 82 } 83 84 // Add an fd to the poller. 85 // 86 // id is returned by Wait in the unix.EpollEvent.Pad field any may be zero. It 87 // must not exceed math.MaxInt32. 88 // 89 // Add is blocked by Wait. 90 func (p *Poller) Add(fd int, id int) error { 91 if int64(id) > math.MaxInt32 { 92 return fmt.Errorf("unsupported id: %d", id) 93 } 94 95 p.epollMu.Lock() 96 defer p.epollMu.Unlock() 97 98 if p.epollFd == -1 { 99 return fmt.Errorf("epoll add: %w", os.ErrClosed) 100 } 101 102 // The representation of EpollEvent isn't entirely accurate. 103 // Pad is fully usable, not just padding. Hence we stuff the 104 // id in there, which allows us to identify the event later (e.g., 105 // in case of perf events, which CPU sent it). 106 event := unix.EpollEvent{ 107 Events: unix.EPOLLIN, 108 Fd: int32(fd), 109 Pad: int32(id), 110 } 111 112 if err := unix.EpollCtl(p.epollFd, unix.EPOLL_CTL_ADD, fd, &event); err != nil { 113 return fmt.Errorf("add fd to epoll: %v", err) 114 } 115 116 return nil 117 } 118 119 // Wait for events. 120 // 121 // Returns the number of pending events or an error wrapping os.ErrClosed if 122 // Close is called, or os.ErrDeadlineExceeded if EpollWait timeout. 123 func (p *Poller) Wait(events []unix.EpollEvent, deadline time.Time) (int, error) { 124 p.epollMu.Lock() 125 defer p.epollMu.Unlock() 126 127 if p.epollFd == -1 { 128 return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed) 129 } 130 131 for { 132 timeout := int(-1) 133 if !deadline.IsZero() { 134 msec := time.Until(deadline).Milliseconds() 135 // Deadline is in the past, don't block. 136 msec = max(msec, 0) 137 // Deadline is too far in the future. 138 msec = min(msec, math.MaxInt) 139 140 timeout = int(msec) 141 } 142 143 n, err := unix.EpollWait(p.epollFd, events, timeout) 144 if temp, ok := err.(temporaryError); ok && temp.Temporary() { 145 // Retry the syscall if we were interrupted, see https://github.com/golang/go/issues/20400 146 continue 147 } 148 149 if err != nil { 150 return 0, err 151 } 152 153 if n == 0 { 154 return 0, fmt.Errorf("epoll wait: %w", os.ErrDeadlineExceeded) 155 } 156 157 for _, event := range events[:n] { 158 if int(event.Fd) == p.event.raw { 159 // Since we don't read p.event the event is never cleared and 160 // we'll keep getting this wakeup until Close() acquires the 161 // lock and sets p.epollFd = -1. 162 return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed) 163 } 164 } 165 166 return n, nil 167 } 168 } 169 170 type temporaryError interface { 171 Temporary() bool 172 } 173 174 // wakeWait unblocks Wait if it's epoll_wait. 175 func (p *Poller) wakeWait() error { 176 p.eventMu.Lock() 177 defer p.eventMu.Unlock() 178 179 if p.event == nil { 180 return fmt.Errorf("epoll wake: %w", os.ErrClosed) 181 } 182 183 return p.event.add(1) 184 } 185 186 // eventFd wraps a Linux eventfd. 187 // 188 // An eventfd acts like a counter: writes add to the counter, reads retrieve 189 // the counter and reset it to zero. Reads also block if the counter is zero. 190 // 191 // See man 2 eventfd. 192 type eventFd struct { 193 file *os.File 194 // prefer raw over file.Fd(), since the latter puts the file into blocking 195 // mode. 196 raw int 197 } 198 199 func newEventFd() (*eventFd, error) { 200 fd, err := unix.Eventfd(0, unix.O_CLOEXEC|unix.O_NONBLOCK) 201 if err != nil { 202 return nil, err 203 } 204 file := os.NewFile(uintptr(fd), "event") 205 return &eventFd{file, fd}, nil 206 } 207 208 func (efd *eventFd) close() error { 209 return efd.file.Close() 210 } 211 212 func (efd *eventFd) add(n uint64) error { 213 var buf [8]byte 214 internal.NativeEndian.PutUint64(buf[:], n) 215 _, err := efd.file.Write(buf[:]) 216 return err 217 } 218 219 func (efd *eventFd) read() (uint64, error) { 220 var buf [8]byte 221 _, err := efd.file.Read(buf[:]) 222 return internal.NativeEndian.Uint64(buf[:]), err 223 }