github.com/cilium/ebpf@v0.10.0/perf/ring.go (about) 1 package perf 2 3 import ( 4 "errors" 5 "fmt" 6 "io" 7 "math" 8 "os" 9 "runtime" 10 "sync/atomic" 11 "unsafe" 12 13 "github.com/cilium/ebpf/internal/unix" 14 ) 15 16 // perfEventRing is a page of metadata followed by 17 // a variable number of pages which form a ring buffer. 18 type perfEventRing struct { 19 fd int 20 cpu int 21 mmap []byte 22 *ringReader 23 } 24 25 func newPerfEventRing(cpu, perCPUBuffer, watermark int) (*perfEventRing, error) { 26 if watermark >= perCPUBuffer { 27 return nil, errors.New("watermark must be smaller than perCPUBuffer") 28 } 29 30 fd, err := createPerfEvent(cpu, watermark) 31 if err != nil { 32 return nil, err 33 } 34 35 if err := unix.SetNonblock(fd, true); err != nil { 36 unix.Close(fd) 37 return nil, err 38 } 39 40 mmap, err := unix.Mmap(fd, 0, perfBufferSize(perCPUBuffer), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED) 41 if err != nil { 42 unix.Close(fd) 43 return nil, fmt.Errorf("can't mmap: %v", err) 44 } 45 46 // This relies on the fact that we allocate an extra metadata page, 47 // and that the struct is smaller than an OS page. 48 // This use of unsafe.Pointer isn't explicitly sanctioned by the 49 // documentation, since a byte is smaller than sampledPerfEvent. 50 meta := (*unix.PerfEventMmapPage)(unsafe.Pointer(&mmap[0])) 51 52 ring := &perfEventRing{ 53 fd: fd, 54 cpu: cpu, 55 mmap: mmap, 56 ringReader: newRingReader(meta, mmap[meta.Data_offset:meta.Data_offset+meta.Data_size]), 57 } 58 runtime.SetFinalizer(ring, (*perfEventRing).Close) 59 60 return ring, nil 61 } 62 63 // mmapBufferSize returns a valid mmap buffer size for use with perf_event_open (1+2^n pages) 64 func perfBufferSize(perCPUBuffer int) int { 65 pageSize := os.Getpagesize() 66 67 // Smallest whole number of pages 68 nPages := (perCPUBuffer + pageSize - 1) / pageSize 69 70 // Round up to nearest power of two number of pages 71 nPages = int(math.Pow(2, math.Ceil(math.Log2(float64(nPages))))) 72 73 // Add one for metadata 74 nPages += 1 75 76 return nPages * pageSize 77 } 78 79 func (ring *perfEventRing) Close() { 80 runtime.SetFinalizer(ring, nil) 81 82 _ = unix.Close(ring.fd) 83 _ = unix.Munmap(ring.mmap) 84 85 ring.fd = -1 86 ring.mmap = nil 87 } 88 89 func createPerfEvent(cpu, watermark int) (int, error) { 90 if watermark == 0 { 91 watermark = 1 92 } 93 94 attr := unix.PerfEventAttr{ 95 Type: unix.PERF_TYPE_SOFTWARE, 96 Config: unix.PERF_COUNT_SW_BPF_OUTPUT, 97 Bits: unix.PerfBitWatermark, 98 Sample_type: unix.PERF_SAMPLE_RAW, 99 Wakeup: uint32(watermark), 100 } 101 102 attr.Size = uint32(unsafe.Sizeof(attr)) 103 fd, err := unix.PerfEventOpen(&attr, -1, cpu, -1, unix.PERF_FLAG_FD_CLOEXEC) 104 if err != nil { 105 return -1, fmt.Errorf("can't create perf event: %w", err) 106 } 107 return fd, nil 108 } 109 110 type ringReader struct { 111 meta *unix.PerfEventMmapPage 112 head, tail uint64 113 mask uint64 114 ring []byte 115 } 116 117 func newRingReader(meta *unix.PerfEventMmapPage, ring []byte) *ringReader { 118 return &ringReader{ 119 meta: meta, 120 head: atomic.LoadUint64(&meta.Data_head), 121 tail: atomic.LoadUint64(&meta.Data_tail), 122 // cap is always a power of two 123 mask: uint64(cap(ring) - 1), 124 ring: ring, 125 } 126 } 127 128 func (rr *ringReader) loadHead() { 129 rr.head = atomic.LoadUint64(&rr.meta.Data_head) 130 } 131 132 func (rr *ringReader) writeTail() { 133 // Commit the new tail. This lets the kernel know that 134 // the ring buffer has been consumed. 135 atomic.StoreUint64(&rr.meta.Data_tail, rr.tail) 136 } 137 138 func (rr *ringReader) Read(p []byte) (int, error) { 139 start := int(rr.tail & rr.mask) 140 141 n := len(p) 142 // Truncate if the read wraps in the ring buffer 143 if remainder := cap(rr.ring) - start; n > remainder { 144 n = remainder 145 } 146 147 // Truncate if there isn't enough data 148 if remainder := int(rr.head - rr.tail); n > remainder { 149 n = remainder 150 } 151 152 copy(p, rr.ring[start:start+n]) 153 rr.tail += uint64(n) 154 155 if rr.tail == rr.head { 156 return n, io.EOF 157 } 158 159 return n, nil 160 }