github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/ringbuf/ring.go (about)

     1  package ringbuf
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"os"
     7  	"runtime"
     8  	"sync/atomic"
     9  	"unsafe"
    10  
    11  	"github.com/cilium/ebpf/internal"
    12  	"github.com/cilium/ebpf/internal/unix"
    13  )
    14  
    15  type ringbufEventRing struct {
    16  	prod []byte
    17  	cons []byte
    18  	*ringReader
    19  }
    20  
    21  func newRingBufEventRing(mapFD, size int) (*ringbufEventRing, error) {
    22  	cons, err := unix.Mmap(mapFD, 0, os.Getpagesize(), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED)
    23  	if err != nil {
    24  		return nil, fmt.Errorf("can't mmap consumer page: %w", err)
    25  	}
    26  
    27  	prod, err := unix.Mmap(mapFD, (int64)(os.Getpagesize()), os.Getpagesize()+2*size, unix.PROT_READ, unix.MAP_SHARED)
    28  	if err != nil {
    29  		_ = unix.Munmap(cons)
    30  		return nil, fmt.Errorf("can't mmap data pages: %w", err)
    31  	}
    32  
    33  	cons_pos := (*uint64)(unsafe.Pointer(&cons[0]))
    34  	prod_pos := (*uint64)(unsafe.Pointer(&prod[0]))
    35  
    36  	ring := &ringbufEventRing{
    37  		prod:       prod,
    38  		cons:       cons,
    39  		ringReader: newRingReader(cons_pos, prod_pos, prod[os.Getpagesize():]),
    40  	}
    41  	runtime.SetFinalizer(ring, (*ringbufEventRing).Close)
    42  
    43  	return ring, nil
    44  }
    45  
    46  func (ring *ringbufEventRing) Close() {
    47  	runtime.SetFinalizer(ring, nil)
    48  
    49  	_ = unix.Munmap(ring.prod)
    50  	_ = unix.Munmap(ring.cons)
    51  
    52  	ring.prod = nil
    53  	ring.cons = nil
    54  }
    55  
    56  type ringReader struct {
    57  	// These point into mmap'ed memory and must be accessed atomically.
    58  	prod_pos, cons_pos *uint64
    59  	mask               uint64
    60  	ring               []byte
    61  }
    62  
    63  func newRingReader(cons_ptr, prod_ptr *uint64, ring []byte) *ringReader {
    64  	return &ringReader{
    65  		prod_pos: prod_ptr,
    66  		cons_pos: cons_ptr,
    67  		// cap is always a power of two
    68  		mask: uint64(cap(ring)/2 - 1),
    69  		ring: ring,
    70  	}
    71  }
    72  
    73  func (rr *ringReader) isEmpty() bool {
    74  	cons := atomic.LoadUint64(rr.cons_pos)
    75  	prod := atomic.LoadUint64(rr.prod_pos)
    76  
    77  	return prod == cons
    78  }
    79  
    80  func (rr *ringReader) size() int {
    81  	return cap(rr.ring)
    82  }
    83  
    84  // Read a record from an event ring.
    85  func (rr *ringReader) readRecord(rec *Record) error {
    86  	prod := atomic.LoadUint64(rr.prod_pos)
    87  	cons := atomic.LoadUint64(rr.cons_pos)
    88  
    89  	for {
    90  		if remaining := prod - cons; remaining == 0 {
    91  			return errEOR
    92  		} else if remaining < unix.BPF_RINGBUF_HDR_SZ {
    93  			return fmt.Errorf("read record header: %w", io.ErrUnexpectedEOF)
    94  		}
    95  
    96  		// read the len field of the header atomically to ensure a happens before
    97  		// relationship with the xchg in the kernel. Without this we may see len
    98  		// without BPF_RINGBUF_BUSY_BIT before the written data is visible.
    99  		// See https://github.com/torvalds/linux/blob/v6.8/kernel/bpf/ringbuf.c#L484
   100  		start := cons & rr.mask
   101  		len := atomic.LoadUint32((*uint32)((unsafe.Pointer)(&rr.ring[start])))
   102  		header := ringbufHeader{Len: len}
   103  
   104  		if header.isBusy() {
   105  			// the next sample in the ring is not committed yet so we
   106  			// exit without storing the reader/consumer position
   107  			// and start again from the same position.
   108  			return errBusy
   109  		}
   110  
   111  		cons += unix.BPF_RINGBUF_HDR_SZ
   112  
   113  		// Data is always padded to 8 byte alignment.
   114  		dataLenAligned := uint64(internal.Align(header.dataLen(), 8))
   115  		if remaining := prod - cons; remaining < dataLenAligned {
   116  			return fmt.Errorf("read sample data: %w", io.ErrUnexpectedEOF)
   117  		}
   118  
   119  		start = cons & rr.mask
   120  		cons += dataLenAligned
   121  
   122  		if header.isDiscard() {
   123  			// when the record header indicates that the data should be
   124  			// discarded, we skip it by just updating the consumer position
   125  			// to the next record.
   126  			atomic.StoreUint64(rr.cons_pos, cons)
   127  			continue
   128  		}
   129  
   130  		if n := header.dataLen(); cap(rec.RawSample) < n {
   131  			rec.RawSample = make([]byte, n)
   132  		} else {
   133  			rec.RawSample = rec.RawSample[:n]
   134  		}
   135  
   136  		copy(rec.RawSample, rr.ring[start:])
   137  		rec.Remaining = int(prod - cons)
   138  		atomic.StoreUint64(rr.cons_pos, cons)
   139  		return nil
   140  	}
   141  }