github.com/cilium/ebpf@v0.10.0/internal/epoll/poller.go (about) 1 package epoll 2 3 import ( 4 "fmt" 5 "math" 6 "os" 7 "runtime" 8 "sync" 9 "time" 10 11 "github.com/cilium/ebpf/internal" 12 "github.com/cilium/ebpf/internal/unix" 13 ) 14 15 // Poller waits for readiness notifications from multiple file descriptors. 16 // 17 // The wait can be interrupted by calling Close. 18 type Poller struct { 19 // mutexes protect the fields declared below them. If you need to 20 // acquire both at once you must lock epollMu before eventMu. 21 epollMu sync.Mutex 22 epollFd int 23 24 eventMu sync.Mutex 25 event *eventFd 26 } 27 28 func New() (*Poller, error) { 29 epollFd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) 30 if err != nil { 31 return nil, fmt.Errorf("create epoll fd: %v", err) 32 } 33 34 p := &Poller{epollFd: epollFd} 35 p.event, err = newEventFd() 36 if err != nil { 37 unix.Close(epollFd) 38 return nil, err 39 } 40 41 if err := p.Add(p.event.raw, 0); err != nil { 42 unix.Close(epollFd) 43 p.event.close() 44 return nil, fmt.Errorf("add eventfd: %w", err) 45 } 46 47 runtime.SetFinalizer(p, (*Poller).Close) 48 return p, nil 49 } 50 51 // Close the poller. 52 // 53 // Interrupts any calls to Wait. Multiple calls to Close are valid, but subsequent 54 // calls will return os.ErrClosed. 55 func (p *Poller) Close() error { 56 runtime.SetFinalizer(p, nil) 57 58 // Interrupt Wait() via the event fd if it's currently blocked. 59 if err := p.wakeWait(); err != nil { 60 return err 61 } 62 63 // Acquire the lock. This ensures that Wait isn't running. 64 p.epollMu.Lock() 65 defer p.epollMu.Unlock() 66 67 // Prevent other calls to Close(). 68 p.eventMu.Lock() 69 defer p.eventMu.Unlock() 70 71 if p.epollFd != -1 { 72 unix.Close(p.epollFd) 73 p.epollFd = -1 74 } 75 76 if p.event != nil { 77 p.event.close() 78 p.event = nil 79 } 80 81 return nil 82 } 83 84 // Add an fd to the poller. 85 // 86 // id is returned by Wait in the unix.EpollEvent.Pad field any may be zero. It 87 // must not exceed math.MaxInt32. 88 // 89 // Add is blocked by Wait. 90 func (p *Poller) Add(fd int, id int) error { 91 if int64(id) > math.MaxInt32 { 92 return fmt.Errorf("unsupported id: %d", id) 93 } 94 95 p.epollMu.Lock() 96 defer p.epollMu.Unlock() 97 98 if p.epollFd == -1 { 99 return fmt.Errorf("epoll add: %w", os.ErrClosed) 100 } 101 102 // The representation of EpollEvent isn't entirely accurate. 103 // Pad is fully useable, not just padding. Hence we stuff the 104 // id in there, which allows us to identify the event later (e.g., 105 // in case of perf events, which CPU sent it). 106 event := unix.EpollEvent{ 107 Events: unix.EPOLLIN, 108 Fd: int32(fd), 109 Pad: int32(id), 110 } 111 112 if err := unix.EpollCtl(p.epollFd, unix.EPOLL_CTL_ADD, fd, &event); err != nil { 113 return fmt.Errorf("add fd to epoll: %v", err) 114 } 115 116 return nil 117 } 118 119 // Wait for events. 120 // 121 // Returns the number of pending events or an error wrapping os.ErrClosed if 122 // Close is called, or os.ErrDeadlineExceeded if EpollWait timeout. 123 func (p *Poller) Wait(events []unix.EpollEvent, deadline time.Time) (int, error) { 124 p.epollMu.Lock() 125 defer p.epollMu.Unlock() 126 127 if p.epollFd == -1 { 128 return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed) 129 } 130 131 for { 132 timeout := int(-1) 133 if !deadline.IsZero() { 134 msec := time.Until(deadline).Milliseconds() 135 if msec < 0 { 136 // Deadline is in the past. 137 msec = 0 138 } else if msec > math.MaxInt { 139 // Deadline is too far in the future. 140 msec = math.MaxInt 141 } 142 timeout = int(msec) 143 } 144 145 n, err := unix.EpollWait(p.epollFd, events, timeout) 146 if temp, ok := err.(temporaryError); ok && temp.Temporary() { 147 // Retry the syscall if we were interrupted, see https://github.com/golang/go/issues/20400 148 continue 149 } 150 151 if err != nil { 152 return 0, err 153 } 154 155 if n == 0 { 156 return 0, fmt.Errorf("epoll wait: %w", os.ErrDeadlineExceeded) 157 } 158 159 for _, event := range events[:n] { 160 if int(event.Fd) == p.event.raw { 161 // Since we don't read p.event the event is never cleared and 162 // we'll keep getting this wakeup until Close() acquires the 163 // lock and sets p.epollFd = -1. 164 return 0, fmt.Errorf("epoll wait: %w", os.ErrClosed) 165 } 166 } 167 168 return n, nil 169 } 170 } 171 172 type temporaryError interface { 173 Temporary() bool 174 } 175 176 // waitWait unblocks Wait if it's epoll_wait. 177 func (p *Poller) wakeWait() error { 178 p.eventMu.Lock() 179 defer p.eventMu.Unlock() 180 181 if p.event == nil { 182 return fmt.Errorf("epoll wake: %w", os.ErrClosed) 183 } 184 185 return p.event.add(1) 186 } 187 188 // eventFd wraps a Linux eventfd. 189 // 190 // An eventfd acts like a counter: writes add to the counter, reads retrieve 191 // the counter and reset it to zero. Reads also block if the counter is zero. 192 // 193 // See man 2 eventfd. 194 type eventFd struct { 195 file *os.File 196 // prefer raw over file.Fd(), since the latter puts the file into blocking 197 // mode. 198 raw int 199 } 200 201 func newEventFd() (*eventFd, error) { 202 fd, err := unix.Eventfd(0, unix.O_CLOEXEC|unix.O_NONBLOCK) 203 if err != nil { 204 return nil, err 205 } 206 file := os.NewFile(uintptr(fd), "event") 207 return &eventFd{file, fd}, nil 208 } 209 210 func (efd *eventFd) close() error { 211 return efd.file.Close() 212 } 213 214 func (efd *eventFd) add(n uint64) error { 215 var buf [8]byte 216 internal.NativeEndian.PutUint64(buf[:], 1) 217 _, err := efd.file.Write(buf[:]) 218 return err 219 } 220 221 func (efd *eventFd) read() (uint64, error) { 222 var buf [8]byte 223 _, err := efd.file.Read(buf[:]) 224 return internal.NativeEndian.Uint64(buf[:]), err 225 }