github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/ringbuf/ring.go (about) 1 package ringbuf 2 3 import ( 4 "fmt" 5 "io" 6 "os" 7 "runtime" 8 "sync/atomic" 9 "unsafe" 10 11 "github.com/cilium/ebpf/internal" 12 "github.com/cilium/ebpf/internal/unix" 13 ) 14 15 type ringbufEventRing struct { 16 prod []byte 17 cons []byte 18 *ringReader 19 } 20 21 func newRingBufEventRing(mapFD, size int) (*ringbufEventRing, error) { 22 cons, err := unix.Mmap(mapFD, 0, os.Getpagesize(), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED) 23 if err != nil { 24 return nil, fmt.Errorf("can't mmap consumer page: %w", err) 25 } 26 27 prod, err := unix.Mmap(mapFD, (int64)(os.Getpagesize()), os.Getpagesize()+2*size, unix.PROT_READ, unix.MAP_SHARED) 28 if err != nil { 29 _ = unix.Munmap(cons) 30 return nil, fmt.Errorf("can't mmap data pages: %w", err) 31 } 32 33 cons_pos := (*uint64)(unsafe.Pointer(&cons[0])) 34 prod_pos := (*uint64)(unsafe.Pointer(&prod[0])) 35 36 ring := &ringbufEventRing{ 37 prod: prod, 38 cons: cons, 39 ringReader: newRingReader(cons_pos, prod_pos, prod[os.Getpagesize():]), 40 } 41 runtime.SetFinalizer(ring, (*ringbufEventRing).Close) 42 43 return ring, nil 44 } 45 46 func (ring *ringbufEventRing) Close() { 47 runtime.SetFinalizer(ring, nil) 48 49 _ = unix.Munmap(ring.prod) 50 _ = unix.Munmap(ring.cons) 51 52 ring.prod = nil 53 ring.cons = nil 54 } 55 56 type ringReader struct { 57 // These point into mmap'ed memory and must be accessed atomically. 58 prod_pos, cons_pos *uint64 59 mask uint64 60 ring []byte 61 } 62 63 func newRingReader(cons_ptr, prod_ptr *uint64, ring []byte) *ringReader { 64 return &ringReader{ 65 prod_pos: prod_ptr, 66 cons_pos: cons_ptr, 67 // cap is always a power of two 68 mask: uint64(cap(ring)/2 - 1), 69 ring: ring, 70 } 71 } 72 73 func (rr *ringReader) isEmpty() bool { 74 cons := atomic.LoadUint64(rr.cons_pos) 75 prod := atomic.LoadUint64(rr.prod_pos) 76 77 return prod == cons 78 } 79 80 func (rr *ringReader) size() int { 81 return cap(rr.ring) 82 } 83 84 // Read a record from an event ring. 85 func (rr *ringReader) readRecord(rec *Record) error { 86 prod := atomic.LoadUint64(rr.prod_pos) 87 cons := atomic.LoadUint64(rr.cons_pos) 88 89 for { 90 if remaining := prod - cons; remaining == 0 { 91 return errEOR 92 } else if remaining < unix.BPF_RINGBUF_HDR_SZ { 93 return fmt.Errorf("read record header: %w", io.ErrUnexpectedEOF) 94 } 95 96 // read the len field of the header atomically to ensure a happens before 97 // relationship with the xchg in the kernel. Without this we may see len 98 // without BPF_RINGBUF_BUSY_BIT before the written data is visible. 99 // See https://github.com/torvalds/linux/blob/v6.8/kernel/bpf/ringbuf.c#L484 100 start := cons & rr.mask 101 len := atomic.LoadUint32((*uint32)((unsafe.Pointer)(&rr.ring[start]))) 102 header := ringbufHeader{Len: len} 103 104 if header.isBusy() { 105 // the next sample in the ring is not committed yet so we 106 // exit without storing the reader/consumer position 107 // and start again from the same position. 108 return errBusy 109 } 110 111 cons += unix.BPF_RINGBUF_HDR_SZ 112 113 // Data is always padded to 8 byte alignment. 114 dataLenAligned := uint64(internal.Align(header.dataLen(), 8)) 115 if remaining := prod - cons; remaining < dataLenAligned { 116 return fmt.Errorf("read sample data: %w", io.ErrUnexpectedEOF) 117 } 118 119 start = cons & rr.mask 120 cons += dataLenAligned 121 122 if header.isDiscard() { 123 // when the record header indicates that the data should be 124 // discarded, we skip it by just updating the consumer position 125 // to the next record. 126 atomic.StoreUint64(rr.cons_pos, cons) 127 continue 128 } 129 130 if n := header.dataLen(); cap(rec.RawSample) < n { 131 rec.RawSample = make([]byte, n) 132 } else { 133 rec.RawSample = rec.RawSample[:n] 134 } 135 136 copy(rec.RawSample, rr.ring[start:]) 137 rec.Remaining = int(prod - cons) 138 atomic.StoreUint64(rr.cons_pos, cons) 139 return nil 140 } 141 }