github.com/cilium/ebpf@v0.10.0/ringbuf/reader.go (about)

     1  package ringbuf
     2  
     3  import (
     4  	"encoding/binary"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/cilium/ebpf"
    13  	"github.com/cilium/ebpf/internal"
    14  	"github.com/cilium/ebpf/internal/epoll"
    15  	"github.com/cilium/ebpf/internal/unix"
    16  )
    17  
    18  var (
    19  	ErrClosed  = os.ErrClosed
    20  	errEOR     = errors.New("end of ring")
    21  	errDiscard = errors.New("sample discarded")
    22  	errBusy    = errors.New("sample not committed yet")
    23  )
    24  
    25  var ringbufHeaderSize = binary.Size(ringbufHeader{})
    26  
    27  // ringbufHeader from 'struct bpf_ringbuf_hdr' in kernel/bpf/ringbuf.c
    28  type ringbufHeader struct {
    29  	Len   uint32
    30  	PgOff uint32
    31  }
    32  
    33  func (rh *ringbufHeader) isBusy() bool {
    34  	return rh.Len&unix.BPF_RINGBUF_BUSY_BIT != 0
    35  }
    36  
    37  func (rh *ringbufHeader) isDiscard() bool {
    38  	return rh.Len&unix.BPF_RINGBUF_DISCARD_BIT != 0
    39  }
    40  
    41  func (rh *ringbufHeader) dataLen() int {
    42  	return int(rh.Len & ^uint32(unix.BPF_RINGBUF_BUSY_BIT|unix.BPF_RINGBUF_DISCARD_BIT))
    43  }
    44  
    45  type Record struct {
    46  	RawSample []byte
    47  }
    48  
    49  // Read a record from an event ring.
    50  //
    51  // buf must be at least ringbufHeaderSize bytes long.
    52  func readRecord(rd *ringbufEventRing, rec *Record, buf []byte) error {
    53  	rd.loadConsumer()
    54  
    55  	buf = buf[:ringbufHeaderSize]
    56  	if _, err := io.ReadFull(rd, buf); err == io.EOF {
    57  		return errEOR
    58  	} else if err != nil {
    59  		return fmt.Errorf("read event header: %w", err)
    60  	}
    61  
    62  	header := ringbufHeader{
    63  		internal.NativeEndian.Uint32(buf[0:4]),
    64  		internal.NativeEndian.Uint32(buf[4:8]),
    65  	}
    66  
    67  	if header.isBusy() {
    68  		// the next sample in the ring is not committed yet so we
    69  		// exit without storing the reader/consumer position
    70  		// and start again from the same position.
    71  		return errBusy
    72  	}
    73  
    74  	/* read up to 8 byte alignment */
    75  	dataLenAligned := uint64(internal.Align(header.dataLen(), 8))
    76  
    77  	if header.isDiscard() {
    78  		// when the record header indicates that the data should be
    79  		// discarded, we skip it by just updating the consumer position
    80  		// to the next record instead of normal Read() to avoid allocating data
    81  		// and reading/copying from the ring (which normally keeps track of the
    82  		// consumer position).
    83  		rd.skipRead(dataLenAligned)
    84  		rd.storeConsumer()
    85  
    86  		return errDiscard
    87  	}
    88  
    89  	if cap(rec.RawSample) < int(dataLenAligned) {
    90  		rec.RawSample = make([]byte, dataLenAligned)
    91  	} else {
    92  		rec.RawSample = rec.RawSample[:dataLenAligned]
    93  	}
    94  
    95  	if _, err := io.ReadFull(rd, rec.RawSample); err != nil {
    96  		return fmt.Errorf("read sample: %w", err)
    97  	}
    98  
    99  	rd.storeConsumer()
   100  	rec.RawSample = rec.RawSample[:header.dataLen()]
   101  	return nil
   102  }
   103  
   104  // Reader allows reading bpf_ringbuf_output
   105  // from user space.
   106  type Reader struct {
   107  	poller *epoll.Poller
   108  
   109  	// mu protects read/write access to the Reader structure
   110  	mu          sync.Mutex
   111  	ring        *ringbufEventRing
   112  	epollEvents []unix.EpollEvent
   113  	header      []byte
   114  	haveData    bool
   115  	deadline    time.Time
   116  }
   117  
   118  // NewReader creates a new BPF ringbuf reader.
   119  func NewReader(ringbufMap *ebpf.Map) (*Reader, error) {
   120  	if ringbufMap.Type() != ebpf.RingBuf {
   121  		return nil, fmt.Errorf("invalid Map type: %s", ringbufMap.Type())
   122  	}
   123  
   124  	maxEntries := int(ringbufMap.MaxEntries())
   125  	if maxEntries == 0 || (maxEntries&(maxEntries-1)) != 0 {
   126  		return nil, fmt.Errorf("ringbuffer map size %d is zero or not a power of two", maxEntries)
   127  	}
   128  
   129  	poller, err := epoll.New()
   130  	if err != nil {
   131  		return nil, err
   132  	}
   133  
   134  	if err := poller.Add(ringbufMap.FD(), 0); err != nil {
   135  		poller.Close()
   136  		return nil, err
   137  	}
   138  
   139  	ring, err := newRingBufEventRing(ringbufMap.FD(), maxEntries)
   140  	if err != nil {
   141  		poller.Close()
   142  		return nil, fmt.Errorf("failed to create ringbuf ring: %w", err)
   143  	}
   144  
   145  	return &Reader{
   146  		poller:      poller,
   147  		ring:        ring,
   148  		epollEvents: make([]unix.EpollEvent, 1),
   149  		header:      make([]byte, ringbufHeaderSize),
   150  	}, nil
   151  }
   152  
   153  // Close frees resources used by the reader.
   154  //
   155  // It interrupts calls to Read.
   156  func (r *Reader) Close() error {
   157  	if err := r.poller.Close(); err != nil {
   158  		if errors.Is(err, os.ErrClosed) {
   159  			return nil
   160  		}
   161  		return err
   162  	}
   163  
   164  	// Acquire the lock. This ensures that Read isn't running.
   165  	r.mu.Lock()
   166  	defer r.mu.Unlock()
   167  
   168  	if r.ring != nil {
   169  		r.ring.Close()
   170  		r.ring = nil
   171  	}
   172  
   173  	return nil
   174  }
   175  
   176  // SetDeadline controls how long Read and ReadInto will block waiting for samples.
   177  //
   178  // Passing a zero time.Time will remove the deadline.
   179  func (r *Reader) SetDeadline(t time.Time) {
   180  	r.mu.Lock()
   181  	defer r.mu.Unlock()
   182  
   183  	r.deadline = t
   184  }
   185  
   186  // Read the next record from the BPF ringbuf.
   187  //
   188  // Returns os.ErrClosed if Close is called on the Reader, or os.ErrDeadlineExceeded
   189  // if a deadline was set.
   190  func (r *Reader) Read() (Record, error) {
   191  	var rec Record
   192  	return rec, r.ReadInto(&rec)
   193  }
   194  
   195  // ReadInto is like Read except that it allows reusing Record and associated buffers.
   196  func (r *Reader) ReadInto(rec *Record) error {
   197  	r.mu.Lock()
   198  	defer r.mu.Unlock()
   199  
   200  	if r.ring == nil {
   201  		return fmt.Errorf("ringbuffer: %w", ErrClosed)
   202  	}
   203  
   204  	for {
   205  		if !r.haveData {
   206  			_, err := r.poller.Wait(r.epollEvents[:cap(r.epollEvents)], r.deadline)
   207  			if err != nil {
   208  				return err
   209  			}
   210  			r.haveData = true
   211  		}
   212  
   213  		for {
   214  			err := readRecord(r.ring, rec, r.header)
   215  			if err == errBusy || err == errDiscard {
   216  				continue
   217  			}
   218  			if err == errEOR {
   219  				r.haveData = false
   220  				break
   221  			}
   222  
   223  			return err
   224  		}
   225  	}
   226  }