github.com/google/cadvisor@v0.49.1/perf/collector_libpfm.go (about)

     1  //go:build libpfm && cgo
     2  // +build libpfm,cgo
     3  
     4  // Copyright 2020 Google Inc. All Rights Reserved.
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  // Collector of perf events for a container.
    19  package perf
    20  
    21  // #cgo CFLAGS: -I/usr/include
    22  // #cgo LDFLAGS: -lpfm
    23  // #include <perfmon/pfmlib.h>
    24  // #include <stdlib.h>
    25  // #include <string.h>
    26  import "C"
    27  
    28  import (
    29  	"bytes"
    30  	"encoding/binary"
    31  	"fmt"
    32  	"os"
    33  	"sync"
    34  	"unsafe"
    35  
    36  	"golang.org/x/sys/unix"
    37  	"k8s.io/klog/v2"
    38  
    39  	info "github.com/google/cadvisor/info/v1"
    40  	"github.com/google/cadvisor/stats"
    41  )
    42  
    43  type collector struct {
    44  	cgroupPath         string
    45  	events             PerfEvents
    46  	cpuFiles           map[int]group
    47  	cpuFilesLock       sync.Mutex
    48  	onlineCPUs         []int
    49  	eventToCustomEvent map[Event]*CustomEvent
    50  	uncore             stats.Collector
    51  
    52  	// Handle for mocking purposes.
    53  	perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
    54  	ioctlSetInt   func(fd int, req uint, value int) error
    55  }
    56  
    57  type group struct {
    58  	cpuFiles   map[string]map[int]readerCloser
    59  	names      []string
    60  	leaderName string
    61  }
    62  
    63  var (
    64  	isLibpfmInitialized = false
    65  	libpfmMutex         = sync.Mutex{}
    66  )
    67  
    68  const (
    69  	groupLeaderFileDescriptor = -1
    70  )
    71  
    72  func init() {
    73  	libpfmMutex.Lock()
    74  	defer libpfmMutex.Unlock()
    75  	pErr := C.pfm_initialize()
    76  	if pErr != C.PFM_SUCCESS {
    77  		klog.Errorf("unable to initialize libpfm: %d", int(pErr))
    78  		return
    79  	}
    80  	isLibpfmInitialized = true
    81  }
    82  
    83  func newCollector(cgroupPath string, events PerfEvents, onlineCPUs []int, cpuToSocket map[int]int) *collector {
    84  	collector := &collector{cgroupPath: cgroupPath, events: events, onlineCPUs: onlineCPUs, cpuFiles: map[int]group{}, uncore: NewUncoreCollector(cgroupPath, events, cpuToSocket), perfEventOpen: unix.PerfEventOpen, ioctlSetInt: unix.IoctlSetInt}
    85  	mapEventsToCustomEvents(collector)
    86  	return collector
    87  }
    88  
    89  func (c *collector) UpdateStats(stats *info.ContainerStats) error {
    90  	err := c.uncore.UpdateStats(stats)
    91  	if err != nil {
    92  		klog.Errorf("Failed to get uncore perf event stats: %v", err)
    93  	}
    94  
    95  	c.cpuFilesLock.Lock()
    96  	defer c.cpuFilesLock.Unlock()
    97  
    98  	stats.PerfStats = []info.PerfStat{}
    99  	klog.V(5).Infof("Attempting to update perf_event stats from cgroup %q", c.cgroupPath)
   100  
   101  	for _, group := range c.cpuFiles {
   102  		for cpu, file := range group.cpuFiles[group.leaderName] {
   103  			stat, err := readGroupPerfStat(file, group, cpu, c.cgroupPath)
   104  			if err != nil {
   105  				klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", group.leaderName, cpu, c.cgroupPath, err.Error())
   106  				continue
   107  			}
   108  
   109  			stats.PerfStats = append(stats.PerfStats, stat...)
   110  		}
   111  	}
   112  
   113  	return nil
   114  }
   115  
   116  func readGroupPerfStat(file readerCloser, group group, cpu int, cgroupPath string) ([]info.PerfStat, error) {
   117  	values, err := getPerfValues(file, group)
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  
   122  	perfStats := make([]info.PerfStat, len(values))
   123  	for i, value := range values {
   124  		klog.V(5).Infof("Read metric for event %q for cpu %d from cgroup %q: %d", value.Name, cpu, cgroupPath, value.Value)
   125  		perfStats[i] = info.PerfStat{
   126  			PerfValue: value,
   127  			Cpu:       cpu,
   128  		}
   129  	}
   130  
   131  	return perfStats, nil
   132  }
   133  
   134  func getPerfValues(file readerCloser, group group) ([]info.PerfValue, error) {
   135  	// 24 bytes of GroupReadFormat struct.
   136  	// 16 bytes of Values struct for each element in group.
   137  	// See https://man7.org/linux/man-pages/man2/perf_event_open.2.html section "Reading results" with PERF_FORMAT_GROUP specified.
   138  	buf := make([]byte, 24+16*len(group.names))
   139  	_, err := file.Read(buf)
   140  	if err != nil {
   141  		return []info.PerfValue{}, fmt.Errorf("unable to read perf event group ( leader = %s ): %w", group.leaderName, err)
   142  	}
   143  	perfData := &GroupReadFormat{}
   144  	reader := bytes.NewReader(buf[:24])
   145  	err = binary.Read(reader, binary.LittleEndian, perfData)
   146  	if err != nil {
   147  		return []info.PerfValue{}, fmt.Errorf("unable to decode perf event group ( leader = %s ): %w", group.leaderName, err)
   148  	}
   149  	values := make([]Values, perfData.Nr)
   150  	reader = bytes.NewReader(buf[24:])
   151  	err = binary.Read(reader, binary.LittleEndian, values)
   152  	if err != nil {
   153  		return []info.PerfValue{}, fmt.Errorf("unable to decode perf event group values ( leader = %s ): %w", group.leaderName, err)
   154  	}
   155  
   156  	scalingRatio := 1.0
   157  	if perfData.TimeRunning != 0 && perfData.TimeEnabled != 0 {
   158  		scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
   159  	}
   160  
   161  	perfValues := make([]info.PerfValue, perfData.Nr)
   162  	if scalingRatio != float64(0) {
   163  		for i, name := range group.names {
   164  			perfValues[i] = info.PerfValue{
   165  				ScalingRatio: scalingRatio,
   166  				Value:        uint64(float64(values[i].Value) / scalingRatio),
   167  				Name:         name,
   168  			}
   169  		}
   170  	} else {
   171  		for i, name := range group.names {
   172  			perfValues[i] = info.PerfValue{
   173  				ScalingRatio: scalingRatio,
   174  				Value:        values[i].Value,
   175  				Name:         name,
   176  			}
   177  		}
   178  	}
   179  
   180  	return perfValues, nil
   181  }
   182  
   183  func (c *collector) setup() error {
   184  	cgroup, err := os.Open(c.cgroupPath)
   185  	if err != nil {
   186  		return fmt.Errorf("unable to open cgroup directory %s: %s", c.cgroupPath, err)
   187  	}
   188  	defer cgroup.Close()
   189  
   190  	c.cpuFilesLock.Lock()
   191  	defer c.cpuFilesLock.Unlock()
   192  	cgroupFd := int(cgroup.Fd())
   193  	groupIndex := 0
   194  	for _, group := range c.events.Core.Events {
   195  		// CPUs file descriptors of group leader needed for perf_event_open.
   196  		leaderFileDescriptors := make(map[int]int, len(c.onlineCPUs))
   197  		for _, cpu := range c.onlineCPUs {
   198  			leaderFileDescriptors[cpu] = groupLeaderFileDescriptor
   199  		}
   200  
   201  		leaderFileDescriptors, err := c.createLeaderFileDescriptors(group.events, cgroupFd, groupIndex, leaderFileDescriptors)
   202  		if err != nil {
   203  			klog.Errorf("Cannot count perf event group %v: %v", group.events, err)
   204  			c.deleteGroup(groupIndex)
   205  			continue
   206  		} else {
   207  			groupIndex++
   208  		}
   209  
   210  		// Group is prepared so we should reset and enable counting.
   211  		for _, fd := range leaderFileDescriptors {
   212  			err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0)
   213  			if err != nil {
   214  				return err
   215  			}
   216  			err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0)
   217  			if err != nil {
   218  				return err
   219  			}
   220  		}
   221  	}
   222  
   223  	return nil
   224  }
   225  
   226  func (c *collector) createLeaderFileDescriptors(events []Event, cgroupFd int, groupIndex int, leaderFileDescriptors map[int]int) (map[int]int, error) {
   227  	for j, event := range events {
   228  		// First element is group leader.
   229  		isGroupLeader := j == 0
   230  		customEvent, ok := c.eventToCustomEvent[event]
   231  		var err error
   232  		if ok {
   233  			config := c.createConfigFromRawEvent(customEvent)
   234  			leaderFileDescriptors, err = c.registerEvent(eventInfo{string(customEvent.Name), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors)
   235  			if err != nil {
   236  				return nil, fmt.Errorf("cannot register perf event: %v", err)
   237  			}
   238  		} else {
   239  			config, err := c.createConfigFromEvent(event)
   240  			if err != nil {
   241  				return nil, fmt.Errorf("cannot create config from perf event: %v", err)
   242  
   243  			}
   244  			leaderFileDescriptors, err = c.registerEvent(eventInfo{string(event), config, cgroupFd, groupIndex, isGroupLeader}, leaderFileDescriptors)
   245  			if err != nil {
   246  				return nil, fmt.Errorf("cannot register perf event: %v", err)
   247  			}
   248  			// Clean memory allocated by C code.
   249  			C.free(unsafe.Pointer(config))
   250  		}
   251  	}
   252  	return leaderFileDescriptors, nil
   253  }
   254  
   255  func readPerfEventAttr(name string, pfmGetOsEventEncoding func(string, unsafe.Pointer) error) (*unix.PerfEventAttr, error) {
   256  	perfEventAttrMemory := C.malloc(C.size_t(unsafe.Sizeof(unix.PerfEventAttr{})))
   257  	// Fill memory with 0 values.
   258  	C.memset(perfEventAttrMemory, 0, C.size_t(unsafe.Sizeof(unix.PerfEventAttr{})))
   259  	err := pfmGetOsEventEncoding(name, unsafe.Pointer(perfEventAttrMemory))
   260  	if err != nil {
   261  		return nil, err
   262  	}
   263  	return (*unix.PerfEventAttr)(perfEventAttrMemory), nil
   264  }
   265  
   266  func pfmGetOsEventEncoding(name string, perfEventAttrMemory unsafe.Pointer) error {
   267  	event := pfmPerfEncodeArgT{}
   268  	fstr := C.CString("")
   269  	defer C.free(unsafe.Pointer(fstr))
   270  	event.fstr = unsafe.Pointer(fstr)
   271  	event.attr = perfEventAttrMemory
   272  	event.size = C.size_t(unsafe.Sizeof(event))
   273  	cSafeName := C.CString(name)
   274  	defer C.free(unsafe.Pointer(cSafeName))
   275  	pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event))
   276  	if pErr != C.PFM_SUCCESS {
   277  		return fmt.Errorf("unable to transform event name %s to perf_event_attr: %d", name, int(pErr))
   278  	}
   279  	return nil
   280  }
   281  
   282  type eventInfo struct {
   283  	name          string
   284  	config        *unix.PerfEventAttr
   285  	pid           int
   286  	groupIndex    int
   287  	isGroupLeader bool
   288  }
   289  
   290  func (c *collector) registerEvent(event eventInfo, leaderFileDescriptors map[int]int) (map[int]int, error) {
   291  	newLeaderFileDescriptors := make(map[int]int, len(c.onlineCPUs))
   292  	var pid, flags int
   293  	if event.isGroupLeader {
   294  		pid = event.pid
   295  		flags = unix.PERF_FLAG_FD_CLOEXEC | unix.PERF_FLAG_PID_CGROUP
   296  	} else {
   297  		pid = -1
   298  		flags = unix.PERF_FLAG_FD_CLOEXEC
   299  	}
   300  
   301  	setAttributes(event.config, event.isGroupLeader)
   302  
   303  	for _, cpu := range c.onlineCPUs {
   304  		fd, err := c.perfEventOpen(event.config, pid, cpu, leaderFileDescriptors[cpu], flags)
   305  		if err != nil {
   306  			return leaderFileDescriptors, fmt.Errorf("setting up perf event %#v failed: %q", event.config, err)
   307  		}
   308  		perfFile := os.NewFile(uintptr(fd), event.name)
   309  		if perfFile == nil {
   310  			return leaderFileDescriptors, fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
   311  		}
   312  
   313  		c.addEventFile(event.groupIndex, event.name, cpu, perfFile)
   314  
   315  		// If group leader, save fd for others.
   316  		if event.isGroupLeader {
   317  			newLeaderFileDescriptors[cpu] = fd
   318  		}
   319  	}
   320  
   321  	if event.isGroupLeader {
   322  		return newLeaderFileDescriptors, nil
   323  	}
   324  	return leaderFileDescriptors, nil
   325  }
   326  
   327  func (c *collector) addEventFile(index int, name string, cpu int, perfFile *os.File) {
   328  	_, ok := c.cpuFiles[index]
   329  	if !ok {
   330  		c.cpuFiles[index] = group{
   331  			leaderName: name,
   332  			cpuFiles:   map[string]map[int]readerCloser{},
   333  		}
   334  	}
   335  
   336  	_, ok = c.cpuFiles[index].cpuFiles[name]
   337  	if !ok {
   338  		c.cpuFiles[index].cpuFiles[name] = map[int]readerCloser{}
   339  	}
   340  
   341  	c.cpuFiles[index].cpuFiles[name][cpu] = perfFile
   342  
   343  	// Check if name is already stored.
   344  	for _, have := range c.cpuFiles[index].names {
   345  		if name == have {
   346  			return
   347  		}
   348  	}
   349  
   350  	// Otherwise save it.
   351  	c.cpuFiles[index] = group{
   352  		cpuFiles:   c.cpuFiles[index].cpuFiles,
   353  		names:      append(c.cpuFiles[index].names, name),
   354  		leaderName: c.cpuFiles[index].leaderName,
   355  	}
   356  }
   357  
   358  func (c *collector) deleteGroup(index int) {
   359  	for name, files := range c.cpuFiles[index].cpuFiles {
   360  		for cpu, file := range files {
   361  			klog.V(5).Infof("Closing perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
   362  			err := file.Close()
   363  			if err != nil {
   364  				klog.Warningf("Unable to close perf event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
   365  			}
   366  		}
   367  	}
   368  	delete(c.cpuFiles, index)
   369  }
   370  
   371  func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
   372  	length := len(event.Config)
   373  
   374  	config := &unix.PerfEventAttr{
   375  		Type:   event.Type,
   376  		Config: event.Config[0],
   377  	}
   378  	if length >= 2 {
   379  		config.Ext1 = event.Config[1]
   380  	}
   381  	if length == 3 {
   382  		config.Ext2 = event.Config[2]
   383  	}
   384  
   385  	klog.V(5).Infof("perf_event_attr struct prepared: %#v", config)
   386  	return config
   387  }
   388  
   389  func setAttributes(config *unix.PerfEventAttr, leader bool) {
   390  	config.Sample_type = unix.PERF_SAMPLE_IDENTIFIER
   391  	config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_GROUP | unix.PERF_FORMAT_ID
   392  	config.Bits = unix.PerfBitInherit
   393  
   394  	// Group leader should have this flag set to disable counting until all group would be prepared.
   395  	if leader {
   396  		config.Bits |= unix.PerfBitDisabled
   397  	}
   398  
   399  	config.Size = uint32(unsafe.Sizeof(unix.PerfEventAttr{}))
   400  }
   401  
   402  func (c *collector) Destroy() {
   403  	c.uncore.Destroy()
   404  	c.cpuFilesLock.Lock()
   405  	defer c.cpuFilesLock.Unlock()
   406  
   407  	for i := range c.cpuFiles {
   408  		c.deleteGroup(i)
   409  	}
   410  }
   411  
   412  // Finalize terminates libpfm4 to free resources.
   413  func Finalize() {
   414  	libpfmMutex.Lock()
   415  	defer libpfmMutex.Unlock()
   416  
   417  	klog.V(1).Info("Attempting to terminate libpfm4")
   418  	if !isLibpfmInitialized {
   419  		klog.V(1).Info("libpfm4 has not been initialized; not terminating.")
   420  		return
   421  	}
   422  
   423  	C.pfm_terminate()
   424  	isLibpfmInitialized = false
   425  }
   426  
   427  func mapEventsToCustomEvents(collector *collector) {
   428  	collector.eventToCustomEvent = map[Event]*CustomEvent{}
   429  	for key, event := range collector.events.Core.CustomEvents {
   430  		collector.eventToCustomEvent[event.Name] = &collector.events.Core.CustomEvents[key]
   431  	}
   432  }
   433  
   434  func (c *collector) createConfigFromRawEvent(event *CustomEvent) *unix.PerfEventAttr {
   435  	klog.V(5).Infof("Setting up raw perf event %#v", event)
   436  
   437  	config := createPerfEventAttr(*event)
   438  
   439  	klog.V(5).Infof("perf_event_attr: %#v", config)
   440  
   441  	return config
   442  }
   443  
   444  func (c *collector) createConfigFromEvent(event Event) (*unix.PerfEventAttr, error) {
   445  	klog.V(5).Infof("Setting up perf event %s", string(event))
   446  
   447  	config, err := readPerfEventAttr(string(event), pfmGetOsEventEncoding)
   448  	if err != nil {
   449  		C.free((unsafe.Pointer)(config))
   450  		return nil, err
   451  	}
   452  
   453  	klog.V(5).Infof("perf_event_attr: %#v", config)
   454  
   455  	return config, nil
   456  }