github.com/imran-kn/cilium-fork@v1.6.9/pkg/bpf/perf_linux.go (about)

     1  // Copyright 2016-2018 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build linux
    16  
    17  package bpf
    18  
    19  /*
    20  #cgo CFLAGS: -I../../bpf/include
    21  #include <stdio.h>
    22  #include <string.h>
    23  #include <stdint.h>
    24  #include <linux/unistd.h>
    25  #include <linux/bpf.h>
    26  #include <linux/perf_event.h>
    27  #include <sys/resource.h>
    28  #include <stdlib.h>
    29  
    30  #define READ_ONCE(x)		(*(volatile typeof(x) *)&x)
    31  #define WRITE_ONCE(x, v)	(*(volatile typeof(x) *)&x) = (v)
    32  
    33  // Contract with kernel/user space on perf ring buffer. From
    34  // kernel/events/ring_buffer.c:
    35  //
    36  //   kernel                             user
    37  //
    38  //   if (LOAD ->data_tail) {            LOAD ->data_head
    39  //                      (A)             smp_rmb()       (C)
    40  //      STORE $data                     LOAD $data
    41  //      smp_wmb()       (B)             smp_mb()        (D)
    42  //      STORE ->data_head               STORE ->data_tail
    43  //   }
    44  //
    45  // Where A pairs with D, and B pairs with C.
    46  //
    47  // In our case (A) is a control dependency that separates the load
    48  // of the ->data_tail and the stores of $data. In case ->data_tail
    49  // indicates there is no room in the buffer to store $data we do
    50  // not. D needs to be a full barrier since it separates the data
    51  // READ from the tail WRITE. For B a WMB is sufficient since it
    52  // separates two WRITEs, and for C an RMB is sufficient since it
    53  // separates two READs.
    54  
    55  #if defined(__x86_64__)
    56  # define barrier()				\
    57  	asm volatile("" ::: "memory")
    58  
    59  # define smp_store_release(p, v)		\
    60  do {						\
    61  	barrier();				\
    62  	WRITE_ONCE(*p, v);			\
    63  } while (0)
    64  
    65  # define smp_load_acquire(p)			\
    66  ({						\
    67  	typeof(*p) ___p1 = READ_ONCE(*p);	\
    68  	barrier();				\
    69  	___p1;					\
    70  })
    71  
    72  static inline uint64_t perf_read_head(struct perf_event_mmap_page *up)
    73  {
    74  	return smp_load_acquire(&up->data_head);
    75  }
    76  
    77  static inline void perf_write_tail(struct perf_event_mmap_page *up,
    78  				   uint64_t data_tail)
    79  {
    80  	smp_store_release(&up->data_tail, data_tail);
    81  }
    82  #else
    83  # define smp_mb()	__sync_synchronize()
    84  # define smp_rmb()	__sync_synchronize()
    85  
    86  static inline uint64_t perf_read_head(struct perf_event_mmap_page *up)
    87  {
    88  	uint64_t data_head = READ_ONCE(up->data_head);
    89  
    90  	smp_rmb();
    91  	return data_head;
    92  }
    93  
    94  static inline void perf_write_tail(struct perf_event_mmap_page *up,
    95  				   uint64_t data_tail)
    96  {
    97  	smp_mb();
    98  	WRITE_ONCE(up->data_tail, data_tail);
    99  }
   100  #endif
   101  
   102  void create_perf_event_attr(int type, int config, int sample_type,
   103  			    int wakeup_events, void *attr)
   104  {
   105  	struct perf_event_attr *ptr = attr;
   106  
   107  	memset(ptr, 0, sizeof(*ptr));
   108  
   109  	ptr->type = type;
   110  	ptr->config = config;
   111  	ptr->sample_type = sample_type;
   112  	ptr->sample_period = 1;
   113  	ptr->wakeup_events = wakeup_events;
   114  }
   115  
   116  static void dump_data(uint8_t *data, size_t size, int cpu)
   117  {
   118  	int i;
   119  
   120  	printf("event on cpu%d: ", cpu);
   121  	for (i = 0; i < size; i++)
   122  		printf("%02x ", data[i]);
   123  	printf("\n");
   124  }
   125  
   126  struct event_sample {
   127  	struct perf_event_header header;
   128  	uint32_t size;
   129  	uint8_t data[];
   130  };
   131  
   132  struct read_state {
   133  	void *base;
   134  	uint64_t raw_size;
   135  	uint64_t last_size;
   136  };
   137  
   138  void perf_event_reset_tail(void *_page)
   139  {
   140  	struct perf_event_mmap_page *up = _page;
   141  	uint64_t data_head = perf_read_head(up);
   142  
   143  	// Reset tail to good known state aka tell kernel we've
   144  	// consumed all data.
   145  	perf_write_tail(up, data_head);
   146  }
   147  
   148  int perf_event_read_init(int page_count, int page_size, void *_page,
   149  			 void *_state)
   150  {
   151  	struct perf_event_mmap_page *up = _page;
   152  	struct read_state *state = _state;
   153  	uint64_t data_tail = up->data_tail;
   154  
   155  	if (perf_read_head(up) == data_tail)
   156  		return 0;
   157  
   158  	state->raw_size = page_count * page_size;
   159  	state->base = ((uint8_t *)up) + page_size;
   160  	state->last_size = 0;
   161  
   162  	return 1;
   163  }
   164  
   165  int perf_event_read(int page_size, void *_page, void *_state,
   166  		    void *_buf, void *_msg, void *_sample, void *_lost)
   167  {
   168  	struct perf_event_mmap_page *up = _page;
   169  	struct read_state *state = _state;
   170  	void **sample = (void **) _sample;
   171  	void **lost = (void **) _lost;
   172  	void **msg = (void **) _msg;
   173  	void **buf = (void **) _buf;
   174  	uint64_t e_size, data_tail;
   175  	struct event_sample *e;
   176  	int trunc = 0;
   177  	void *begin;
   178  
   179  	data_tail = up->data_tail + state->last_size;
   180  	perf_write_tail(up, data_tail);
   181  	if (perf_read_head(up) == data_tail)
   182  		return 0;
   183  
   184  	// raw_size is guaranteed power of 2
   185  	e = begin = state->base + (data_tail & (state->raw_size - 1));
   186  	e_size = state->last_size = e->header.size;
   187  	if (begin + e_size > state->base + state->raw_size) {
   188  		uint64_t len = state->base + state->raw_size - begin;
   189  		uint64_t len_first, len_secnd;
   190  		void *ptr = *buf;
   191  
   192  		// For small sizes, we just go with prealloc'ed buffer.
   193  		if (e_size > page_size) {
   194  			ptr = realloc(*buf, e_size);
   195  			if (!ptr) {
   196  				ptr = *buf;
   197  				trunc = 1;
   198  			} else {
   199  				*buf = ptr;
   200  			}
   201  		}
   202  
   203  		len_first = trunc ? (len <= page_size ? len : page_size) : len;
   204  		memcpy(ptr, begin, len_first);
   205  		len_secnd = trunc ? (page_size - len_first) : e_size - len;
   206  		memcpy(ptr + len_first, state->base, len_secnd);
   207  		e = ptr;
   208  	}
   209  
   210  	*msg = e;
   211  	if (e->header.type == PERF_RECORD_SAMPLE) {
   212  		*sample = e;
   213  	} else if (e->header.type == PERF_RECORD_LOST) {
   214  		*lost = e;
   215  	}
   216  
   217  	return 1 + trunc;
   218  }
   219  */
   220  import "C"
   221  
   222  import (
   223  	"encoding/binary"
   224  	"fmt"
   225  	"io"
   226  	"os"
   227  	"path"
   228  	"reflect"
   229  	"runtime"
   230  	"time"
   231  	"unsafe"
   232  
   233  	"github.com/cilium/cilium/pkg/metrics"
   234  	"github.com/cilium/cilium/pkg/option"
   235  	"github.com/cilium/cilium/pkg/spanstat"
   236  
   237  	"golang.org/x/sys/unix"
   238  )
   239  
   240  const (
   241  	MAX_POLL_EVENTS = 32
   242  )
   243  
   244  type PerfEventConfig struct {
   245  	NumCpus      int
   246  	NumPages     int
   247  	MapName      string
   248  	Type         int
   249  	Config       int
   250  	SampleType   int
   251  	WakeupEvents int
   252  }
   253  
   254  func DefaultPerfEventConfig() *PerfEventConfig {
   255  	return &PerfEventConfig{
   256  		MapName:      EventsMapName,
   257  		Type:         PERF_TYPE_SOFTWARE,
   258  		Config:       PERF_COUNT_SW_BPF_OUTPUT,
   259  		SampleType:   PERF_SAMPLE_RAW,
   260  		WakeupEvents: 1,
   261  		NumCpus:      runtime.NumCPU(),
   262  		NumPages:     8,
   263  	}
   264  }
   265  
   266  type PerfEvent struct {
   267  	cpu      int
   268  	Fd       int
   269  	pagesize int
   270  	npages   int
   271  	lost     uint64
   272  	trunc    uint64
   273  	unknown  uint64
   274  	data     []byte
   275  	// state is placed here to reduce memory allocations
   276  	state unsafe.Pointer
   277  	// buf is placed here to reduce memory allocations
   278  	buf unsafe.Pointer
   279  }
   280  
   281  // PerfEventHeader must match 'struct perf_event_header in <linux/perf_event.h>.
   282  type PerfEventHeader struct {
   283  	Type      uint32
   284  	Misc      uint16
   285  	TotalSize uint16
   286  }
   287  
   288  // PerfEventSample must match 'struct perf_event_sample in kernel sources.
   289  type PerfEventSample struct {
   290  	PerfEventHeader
   291  	Size uint32
   292  	data byte // Size bytes of data
   293  }
   294  
   295  // PerfEventLost must match 'struct perf_event_lost in kernel sources.
   296  type PerfEventLost struct {
   297  	PerfEventHeader
   298  	Id   uint64
   299  	Lost uint64
   300  }
   301  
   302  func (e *PerfEventSample) DataDirect() []byte {
   303  	// http://stackoverflow.com/questions/27532523/how-to-convert-1024c-char-to-1024byte
   304  	return (*[1 << 30]byte)(unsafe.Pointer(&e.data))[:int(e.Size):int(e.Size)]
   305  }
   306  
   307  func (e *PerfEventSample) DataCopy() []byte {
   308  	return C.GoBytes(unsafe.Pointer(&e.data), C.int(e.Size))
   309  }
   310  
   311  type ReceiveFunc func(msg *PerfEventSample, cpu int)
   312  type LostFunc func(msg *PerfEventLost, cpu int)
   313  
   314  // ErrorFunc is run when reading PerfEvent results in an error
   315  type ErrorFunc func(msg *PerfEvent)
   316  
   317  func PerfEventOpen(config *PerfEventConfig, pid int, cpu int, groupFD int, flags int) (*PerfEvent, error) {
   318  	attr := C.struct_perf_event_attr{}
   319  
   320  	C.create_perf_event_attr(
   321  		C.int(config.Type),
   322  		C.int(config.Config),
   323  		C.int(config.SampleType),
   324  		C.int(config.WakeupEvents),
   325  		unsafe.Pointer(&attr),
   326  	)
   327  
   328  	var duration *spanstat.SpanStat
   329  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   330  		duration = spanstat.Start()
   331  	}
   332  	ret, _, err := unix.Syscall6(
   333  		unix.SYS_PERF_EVENT_OPEN,
   334  		uintptr(unsafe.Pointer(&attr)),
   335  		uintptr(pid),
   336  		uintptr(cpu),
   337  		uintptr(groupFD),
   338  		uintptr(flags), 0)
   339  	runtime.KeepAlive(&attr)
   340  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   341  		metrics.BPFSyscallDuration.WithLabelValues(metricOpPerfEventOpen, metrics.Errno2Outcome(err)).Observe(duration.EndError(err).Total().Seconds())
   342  	}
   343  
   344  	if int(ret) > 0 && err == 0 {
   345  		return &PerfEvent{
   346  			cpu: cpu,
   347  			Fd:  int(ret),
   348  		}, nil
   349  	}
   350  	return nil, fmt.Errorf("Unable to open perf event: %s", err)
   351  }
   352  
   353  func (e *PerfEvent) Mmap(pagesize int, npages int) error {
   354  	datasize := uint32(pagesize) * uint32(npages)
   355  	if (datasize & (datasize - 1)) != 0 {
   356  		return fmt.Errorf("Unable to mmap perf event: ring size not power of 2")
   357  	}
   358  
   359  	size := pagesize * (npages + 1)
   360  	data, err := unix.Mmap(e.Fd,
   361  		0,
   362  		size,
   363  		unix.PROT_READ|unix.PROT_WRITE,
   364  		unix.MAP_SHARED)
   365  	if err != nil {
   366  		return fmt.Errorf("Unable to mmap perf event: %s", err)
   367  	}
   368  
   369  	e.pagesize = pagesize
   370  	e.npages = npages
   371  	e.data = data
   372  
   373  	return nil
   374  }
   375  
   376  func (e *PerfEvent) Munmap() error {
   377  	return unix.Munmap(e.data)
   378  }
   379  
   380  // allocateBuffers initializes the buffers for sharing between Golang and C.
   381  func (e *PerfEvent) allocateBuffers() {
   382  	// C.malloc() will crash the program if allocation fails, skip check:
   383  	// https://golang.org/cmd/cgo/
   384  	e.state = C.malloc(C.size_t(unsafe.Sizeof(C.struct_read_state{})))
   385  	e.buf = C.malloc(C.size_t(e.pagesize))
   386  }
   387  
   388  func (e *PerfEvent) freeBuffers() {
   389  	C.free(e.buf)
   390  	C.free(e.state)
   391  }
   392  
   393  func (e *PerfEvent) Enable() error {
   394  	e.allocateBuffers()
   395  	var duration *spanstat.SpanStat
   396  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   397  		duration = spanstat.Start()
   398  	}
   399  	err := unix.IoctlSetInt(e.Fd, unix.PERF_EVENT_IOC_ENABLE, 0)
   400  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   401  		metrics.BPFSyscallDuration.WithLabelValues(metricOpPerfEventEnable, metrics.Error2Outcome(err)).Observe(duration.EndError(err).Total().Seconds())
   402  	}
   403  	if err != nil {
   404  		e.freeBuffers()
   405  		return fmt.Errorf("Unable to enable perf event: %v", err)
   406  	}
   407  
   408  	return nil
   409  }
   410  
   411  func (e *PerfEvent) Disable() error {
   412  	var ret error
   413  
   414  	if e == nil {
   415  		return nil
   416  	}
   417  
   418  	// Does not fail in perf's kernel-side ioctl handler, but even if
   419  	// there's not much we can do here ...
   420  	ret = nil
   421  	var duration *spanstat.SpanStat
   422  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   423  		duration = spanstat.Start()
   424  	}
   425  	err := unix.IoctlSetInt(e.Fd, unix.PERF_EVENT_IOC_DISABLE, 0)
   426  	if option.Config.MetricsConfig.BPFSyscallDurationEnabled {
   427  		metrics.BPFSyscallDuration.WithLabelValues(metricOpPerfEventDisable, metrics.Error2Outcome(err)).Observe(duration.EndError(err).Total().Seconds())
   428  	}
   429  	if err != nil {
   430  		ret = fmt.Errorf("Unable to disable perf event: %v", err)
   431  	}
   432  
   433  	e.freeBuffers()
   434  	return ret
   435  }
   436  
   437  // Read attempts to read all events from the perf event buffer, calling one of
   438  // the receive / lost functions for each event. receiveFn is called when the
   439  // event is a valid sample; lostFn is called when the kernel has attempted to
   440  // write an event into the ringbuffer but ran out of space for the event.
   441  //
   442  // If all events are not read within a time period (default 20s), it will call
   443  // errFn() and stop reading events.
   444  func (e *PerfEvent) Read(receive ReceiveFunc, lostFn LostFunc, err ErrorFunc) {
   445  	// Prepare for reading and check if events are available
   446  	available := C.perf_event_read_init(C.int(e.npages), C.int(e.pagesize),
   447  		unsafe.Pointer(&e.data[0]), unsafe.Pointer(e.state))
   448  
   449  	// Poll false positive
   450  	if available == 0 {
   451  		return
   452  	}
   453  
   454  	timer := time.After(20 * time.Second)
   455  read:
   456  	for {
   457  		var (
   458  			msg    *PerfEventHeader
   459  			sample *PerfEventSample
   460  			lost   *PerfEventLost
   461  			ok     C.int
   462  		)
   463  
   464  		// Storing the C pointer to the temporary wrapper buffer on the
   465  		// stack allows CGo to understand it better when passing into
   466  		// perf_event_read(), to avoid the following error:
   467  		//
   468  		// runtime error: cgo argument has Go pointer to Go pointer
   469  		//
   470  		// We MUST store it back to 'e' in case it was reallocated.
   471  		wrapBuf := e.buf
   472  		if ok = C.perf_event_read(C.int(e.pagesize),
   473  			unsafe.Pointer(&e.data[0]), unsafe.Pointer(e.state),
   474  			unsafe.Pointer(&wrapBuf), unsafe.Pointer(&msg),
   475  			unsafe.Pointer(&sample), unsafe.Pointer(&lost)); ok == 0 {
   476  			e.buf = wrapBuf
   477  			break
   478  		}
   479  		e.buf = wrapBuf
   480  
   481  		if ok == 2 {
   482  			e.trunc++
   483  		}
   484  		if msg.Type == C.PERF_RECORD_SAMPLE {
   485  			receive(sample, e.cpu)
   486  		} else if msg.Type == C.PERF_RECORD_LOST {
   487  			e.lost += lost.Lost
   488  			if lostFn != nil {
   489  				lostFn(lost, e.cpu)
   490  			}
   491  		} else {
   492  			e.unknown++
   493  		}
   494  
   495  		select {
   496  		case <-timer:
   497  			err(e)
   498  			C.perf_event_reset_tail(unsafe.Pointer(&e.data[0]))
   499  			break read
   500  		default:
   501  		}
   502  	}
   503  }
   504  
   505  func (e *PerfEvent) Close() {
   506  	if e == nil {
   507  		return
   508  	}
   509  
   510  	unix.Close(e.Fd)
   511  }
   512  
   513  // Debug returns string with internal information about PerfEvent
   514  func (e *PerfEvent) Debug() string {
   515  	return fmt.Sprintf("cpu: %d, Fd: %d, pagesize: %d, npages: %d, lost: %d, unknown: %d, state: %v", e.cpu, e.Fd, e.pagesize, e.npages, e.lost, e.unknown, C.GoBytes(e.state, C.sizeof_struct_read_state))
   516  }
   517  
   518  func (e *PerfEvent) DebugDump() string {
   519  	return fmt.Sprintf("%s, data: %v", e.Debug(), e.data)
   520  }
   521  
   522  type EPoll struct {
   523  	fd     int
   524  	nfds   int
   525  	events [MAX_POLL_EVENTS]unix.EpollEvent
   526  }
   527  
   528  func (ep *EPoll) AddFD(fd int, events uint32) error {
   529  	ev := unix.EpollEvent{
   530  		Events: events,
   531  		Fd:     int32(fd),
   532  	}
   533  
   534  	return unix.EpollCtl(ep.fd, unix.EPOLL_CTL_ADD, fd, &ev)
   535  }
   536  
   537  func (ep *EPoll) Poll(timeout int) (int, error) {
   538  	nfds, err := unix.EpollWait(ep.fd, ep.events[0:], timeout)
   539  	if err != nil {
   540  		return 0, err
   541  	}
   542  
   543  	ep.nfds = nfds
   544  
   545  	return nfds, nil
   546  }
   547  
   548  func (ep *EPoll) Close() {
   549  	if ep.fd > 0 {
   550  		unix.Close(ep.fd)
   551  	}
   552  }
   553  
   554  type EventMap struct {
   555  	fd int
   556  }
   557  
   558  func openMap(path string) (*EventMap, error) {
   559  	fd, err := ObjGet(path)
   560  	if err != nil {
   561  		return nil, err
   562  	}
   563  
   564  	return &EventMap{
   565  		fd: fd,
   566  	}, nil
   567  }
   568  
   569  func (e *EventMap) Update(fd int, ubaPtr unsafe.Pointer, sizeOf uintptr) error {
   570  	return UpdateElementFromPointers(e.fd, ubaPtr, sizeOf)
   571  }
   572  
   573  func (e *EventMap) Close() {
   574  	if e == nil {
   575  		return
   576  	}
   577  
   578  	unix.Close(e.fd)
   579  }
   580  
   581  type PerCpuEvents struct {
   582  	Cpus     int
   583  	Npages   int
   584  	Pagesize int
   585  	eventMap *EventMap
   586  	event    map[int]*PerfEvent
   587  	poll     EPoll
   588  }
   589  
   590  func NewPerCpuEvents(config *PerfEventConfig) (*PerCpuEvents, error) {
   591  	var err error
   592  
   593  	e := &PerCpuEvents{
   594  		Cpus:     config.NumCpus,
   595  		Npages:   config.NumPages,
   596  		Pagesize: os.Getpagesize(),
   597  		event:    make(map[int]*PerfEvent),
   598  	}
   599  
   600  	defer func() {
   601  		if err != nil {
   602  			e.CloseAll()
   603  		}
   604  	}()
   605  
   606  	e.poll.fd, err = unix.EpollCreate1(0)
   607  	if err != nil {
   608  		return nil, err
   609  	}
   610  
   611  	mapPath := config.MapName
   612  	if !path.IsAbs(mapPath) {
   613  		mapPath = MapPath(mapPath)
   614  	}
   615  
   616  	e.eventMap, err = openMap(mapPath)
   617  	if err != nil {
   618  		return nil, err
   619  	}
   620  
   621  	for cpu := int(0); cpu < e.Cpus; cpu++ {
   622  		event, err := PerfEventOpen(config, -1, cpu, -1, 0)
   623  		if err != nil {
   624  			return nil, err
   625  		}
   626  		e.event[event.Fd] = event
   627  
   628  		if err = e.poll.AddFD(event.Fd, unix.EPOLLIN); err != nil {
   629  			return nil, err
   630  		}
   631  
   632  		if err = event.Mmap(e.Pagesize, e.Npages); err != nil {
   633  			return nil, err
   634  		}
   635  
   636  		if err = event.Enable(); err != nil {
   637  			return nil, err
   638  		}
   639  	}
   640  
   641  	if err = e.Unmute(); err != nil {
   642  		return nil, err
   643  	}
   644  
   645  	return e, nil
   646  }
   647  
   648  // Mute removes the perf event fd(s) from the perf event BPF map. This
   649  // has the effect that no new events are pushed into the ring buffer.
   650  func (e *PerCpuEvents) Mute() {
   651  	for _, event := range e.event {
   652  		DeleteElement(e.eventMap.fd, unsafe.Pointer(&event.cpu))
   653  	}
   654  }
   655  
   656  // Unmute adds the perf event fd(s) to the perf event BPF map. This
   657  // has the effect that events can now be pushed into the ring buffer.
   658  func (e *PerCpuEvents) Unmute() error {
   659  	uba := bpfAttrMapOpElem{
   660  		mapFd: uint32(e.eventMap.fd),
   661  		flags: uint64(0),
   662  	}
   663  	ubaPtr := unsafe.Pointer(&uba)
   664  	ubaSizeOf := unsafe.Sizeof(uba)
   665  
   666  	for _, event := range e.event {
   667  		// FIXME: Not sure what to do here, the map has already been updated and we can't
   668  		// fully restore it.
   669  		uba.key = uint64(uintptr(unsafe.Pointer(&event.cpu)))
   670  		uba.value = uint64(uintptr(unsafe.Pointer(&event.Fd)))
   671  		if err := e.eventMap.Update(e.eventMap.fd, ubaPtr, ubaSizeOf); err != nil {
   672  			return err
   673  		}
   674  	}
   675  
   676  	return nil
   677  }
   678  
   679  func (e *PerCpuEvents) Poll(timeout int) (int, error) {
   680  	return e.poll.Poll(timeout)
   681  }
   682  
   683  // ReadAll reads perf events
   684  func (e *PerCpuEvents) ReadAll(receive ReceiveFunc, lost LostFunc, handleError ErrorFunc) error {
   685  	for i := 0; i < e.poll.nfds; i++ {
   686  		fd := int(e.poll.events[i].Fd)
   687  		if event, ok := e.event[fd]; ok {
   688  			event.Read(receive, lost, handleError)
   689  		}
   690  	}
   691  
   692  	return nil
   693  }
   694  
   695  func (e *PerCpuEvents) Stats() (uint64, uint64, uint64) {
   696  	var lost, trunc, unknown uint64
   697  
   698  	for _, event := range e.event {
   699  		lost += event.lost
   700  		trunc += event.trunc
   701  		unknown += event.unknown
   702  	}
   703  
   704  	return lost, trunc, unknown
   705  }
   706  
   707  func (e *PerCpuEvents) CloseAll() error {
   708  	var retErr error
   709  
   710  	e.eventMap.Close()
   711  	e.poll.Close()
   712  
   713  	for _, event := range e.event {
   714  		if err := event.Disable(); err != nil {
   715  			retErr = err
   716  		}
   717  
   718  		event.Munmap()
   719  		event.Close()
   720  	}
   721  
   722  	return retErr
   723  }
   724  
   725  // decode uses reflection to read bytes directly from 'reader' into the object
   726  // pointed to from 'i'. Assumes that 'i' is a pointer.
   727  //
   728  // This function should not be used from performance-sensitive code.
   729  func decode(i interface{}, reader io.ReadSeeker) error {
   730  	value := reflect.ValueOf(i).Elem()
   731  	decodeSize := int64(reflect.TypeOf(value).Size())
   732  	if _, err := reader.Seek(decodeSize, io.SeekStart); err != nil {
   733  		return fmt.Errorf("failed to seek into reader %d bytes", decodeSize)
   734  	}
   735  	_, _ = reader.Seek(0, io.SeekStart)
   736  
   737  	for i := 0; i < value.NumField(); i++ {
   738  		if err := binary.Read(reader, binary.LittleEndian, value.Field(i).Addr().Interface()); err != nil {
   739  			return fmt.Errorf("failed to decode field %d", i)
   740  		}
   741  	}
   742  	return nil
   743  }
   744  
   745  // ReadState is a golang reflection of C.struct_read_state{}
   746  type ReadState struct {
   747  	Base     uint64 // Actually a pointer
   748  	RawSize  uint64
   749  	LastSize uint64
   750  }
   751  
   752  // Decode populates 'r' based on the bytes read from the specified reader.
   753  //
   754  // This function should not be used from performance-sensitive code.
   755  func (r *ReadState) Decode(reader io.ReadSeeker) error {
   756  	return decode(r, reader)
   757  }
   758  
   759  // PerfEventMmapPage reflects the Linux 'struct perf_event_mmap_page'
   760  type PerfEventMmapPage struct {
   761  	Version       uint32 // version number of this structure
   762  	CompatVersion uint32 // lowest version this is compat with
   763  
   764  	Lock        uint32 // seqlock for synchronization
   765  	Index       uint32 // hardware event identifier
   766  	Offset      int64  // add to hardware event value
   767  	TimeEnabled uint64 // time event active
   768  	TimeRunning uint64 // time event on cpu
   769  	//union {
   770  	Capabilities uint64
   771  	//struct {
   772  	//	__u64	cap_bit0		: 1, /* Always 0, deprecated, see commit 860f085b74e9 */
   773  	//		cap_bit0_is_deprecated	: 1, /* Always 1, signals that bit 0 is zero */
   774  
   775  	//		cap_user_rdpmc		: 1, /* The RDPMC instruction can be used to read counts */
   776  	//		cap_user_time		: 1, /* The time_* fields are used */
   777  	//		cap_user_time_zero	: 1, /* The time_zero field is used */
   778  	//		cap_____res		: 59;
   779  	//};
   780  	//};
   781  	PmcWidth uint16
   782  
   783  	TimeShift  uint16
   784  	TimeMult   uint32
   785  	TimeOffset uint64
   786  	TimeZero   uint64
   787  	Size       uint32
   788  
   789  	Reserved [118*8 + 4]uint8 // align to 1k.
   790  
   791  	DataHead   uint64 // head in the data section
   792  	DataTail   uint64 // user-space written tail
   793  	DataOffset uint64 // where the buffer starts
   794  	DataSize   uint64 // data buffer size
   795  
   796  	AuxHead   uint64
   797  	AuxTail   uint64
   798  	AuxOffset uint64
   799  	AuxSize   uint64
   800  }
   801  
   802  // Decode populates 'p' base on the bytes read from the specified reader.
   803  //
   804  // This function should not be used from performance-sensitive code.
   805  func (p *PerfEventMmapPage) Decode(reader io.ReadSeeker) error {
   806  	return decode(p, reader)
   807  }
   808  
   809  // PerfEventFromMemory creates an in-memory PerfEvent object for testing
   810  // and analysis purposes. No kernel interaction is made.
   811  //
   812  // The caller MUST eventually call Disable() to free event resources.
   813  func PerfEventFromMemory(page *PerfEventMmapPage, buf []byte) *PerfEvent {
   814  	pagesize := os.Getpagesize()
   815  	e := &PerfEvent{
   816  		cpu:      1,
   817  		pagesize: pagesize,
   818  		npages:   int(page.DataSize) / pagesize,
   819  		data:     buf,
   820  	}
   821  
   822  	e.allocateBuffers()
   823  	return e
   824  }