github.com/google/cadvisor@v0.49.1/perf/uncore_libpfm.go (about)

     1  //go:build libpfm && cgo
     2  // +build libpfm,cgo
     3  
     4  // Copyright 2020 Google Inc. All Rights Reserved.
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  // Uncore perf events logic.
    19  package perf
    20  
    21  // #cgo CFLAGS: -I/usr/include
    22  // #cgo LDFLAGS: -lpfm
    23  // #include <perfmon/pfmlib.h>
    24  // #include <stdlib.h>
    25  import "C"
    26  import (
    27  	"fmt"
    28  	"os"
    29  	"path/filepath"
    30  	"reflect"
    31  	"regexp"
    32  	"strconv"
    33  	"strings"
    34  	"sync"
    35  	"unsafe"
    36  
    37  	"golang.org/x/sys/unix"
    38  	"k8s.io/klog/v2"
    39  
    40  	info "github.com/google/cadvisor/info/v1"
    41  	"github.com/google/cadvisor/stats"
    42  )
    43  
    44  type pmu struct {
    45  	name   string
    46  	typeOf uint32
    47  	cpus   []uint32
    48  }
    49  
    50  const (
    51  	uncorePMUPrefix    = "uncore"
    52  	pmuTypeFilename    = "type"
    53  	pmuCpumaskFilename = "cpumask"
    54  	systemDevicesPath  = "/sys/devices"
    55  	rootPerfEventPath  = "/sys/fs/cgroup/perf_event"
    56  	uncorePID          = -1
    57  )
    58  
    59  func getPMU(pmus uncorePMUs, gotType uint32) (*pmu, error) {
    60  	for _, pmu := range pmus {
    61  		if pmu.typeOf == gotType {
    62  			return &pmu, nil
    63  		}
    64  	}
    65  
    66  	return nil, fmt.Errorf("there is no pmu with event type: %#v", gotType)
    67  }
    68  
    69  type uncorePMUs map[string]pmu
    70  
    71  func readUncorePMU(path string, name string, cpumaskRegexp *regexp.Regexp) (*pmu, error) {
    72  	buf, err := os.ReadFile(filepath.Join(path, pmuTypeFilename))
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  	typeString := strings.TrimSpace(string(buf))
    77  	eventType, err := strconv.ParseUint(typeString, 0, 32)
    78  	if err != nil {
    79  		return nil, err
    80  	}
    81  
    82  	buf, err = os.ReadFile(filepath.Join(path, pmuCpumaskFilename))
    83  	if err != nil {
    84  		return nil, err
    85  	}
    86  	var cpus []uint32
    87  	cpumask := strings.TrimSpace(string(buf))
    88  	for _, cpu := range cpumaskRegexp.Split(cpumask, -1) {
    89  		parsedCPU, err := strconv.ParseUint(cpu, 0, 32)
    90  		if err != nil {
    91  			return nil, err
    92  		}
    93  		cpus = append(cpus, uint32(parsedCPU))
    94  	}
    95  
    96  	return &pmu{name: name, typeOf: uint32(eventType), cpus: cpus}, nil
    97  }
    98  
    99  func getUncorePMUs(devicesPath string) (uncorePMUs, error) {
   100  	pmus := make(uncorePMUs)
   101  
   102  	// Depends on platform, cpu mask could be for example in form "0-1" or "0,1".
   103  	cpumaskRegexp := regexp.MustCompile("[-,\n]")
   104  	err := filepath.Walk(devicesPath, func(path string, info os.FileInfo, err error) error {
   105  		// Skip root path.
   106  		if path == devicesPath {
   107  			return nil
   108  		}
   109  		if info.IsDir() {
   110  			if strings.HasPrefix(info.Name(), uncorePMUPrefix) {
   111  				pmu, err := readUncorePMU(path, info.Name(), cpumaskRegexp)
   112  				if err != nil {
   113  					return err
   114  				}
   115  				pmus[info.Name()] = *pmu
   116  			}
   117  		}
   118  		return nil
   119  	})
   120  	if err != nil {
   121  		return nil, err
   122  	}
   123  
   124  	return pmus, nil
   125  }
   126  
   127  type uncoreCollector struct {
   128  	cpuFilesLock       sync.Mutex
   129  	cpuFiles           map[int]map[string]group
   130  	events             []Group
   131  	eventToCustomEvent map[Event]*CustomEvent
   132  	cpuToSocket        map[int]int
   133  
   134  	// Handle for mocking purposes.
   135  	perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
   136  	ioctlSetInt   func(fd int, req uint, value int) error
   137  }
   138  
   139  func NewUncoreCollector(cgroupPath string, events PerfEvents, cpuToSocket map[int]int) stats.Collector {
   140  
   141  	if cgroupPath != rootPerfEventPath {
   142  		// Uncore metric doesn't exists for cgroups, only for entire platform.
   143  		return &stats.NoopCollector{}
   144  	}
   145  
   146  	collector := &uncoreCollector{
   147  		cpuToSocket:   cpuToSocket,
   148  		perfEventOpen: unix.PerfEventOpen,
   149  		ioctlSetInt:   unix.IoctlSetInt,
   150  	}
   151  
   152  	err := collector.setup(events, systemDevicesPath)
   153  	if err != nil {
   154  		klog.Errorf("Perf uncore metrics will not be available: unable to setup uncore perf event collector: %v", err)
   155  		return &stats.NoopCollector{}
   156  	}
   157  
   158  	return collector
   159  }
   160  
   161  func (c *uncoreCollector) createLeaderFileDescriptors(events []Event, groupIndex int, groupPMUs map[Event]uncorePMUs,
   162  	leaderFileDescriptors map[string]map[uint32]int) (map[string]map[uint32]int, error) {
   163  	var err error
   164  	for _, event := range events {
   165  		eventName, _ := parseEventName(string(event))
   166  		customEvent, ok := c.eventToCustomEvent[event]
   167  		if ok {
   168  			err = c.setupRawEvent(customEvent, groupPMUs[event], groupIndex, leaderFileDescriptors)
   169  		} else {
   170  			err = c.setupEvent(eventName, groupPMUs[event], groupIndex, leaderFileDescriptors)
   171  		}
   172  		if err != nil {
   173  			break
   174  		}
   175  	}
   176  	if err != nil {
   177  		c.deleteGroup(groupIndex)
   178  		return nil, fmt.Errorf("cannot create config from perf event: %v", err)
   179  	}
   180  	return leaderFileDescriptors, nil
   181  }
   182  
   183  func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error {
   184  	readUncorePMUs, err := getUncorePMUs(devicesPath)
   185  	if err != nil {
   186  		return err
   187  	}
   188  
   189  	c.cpuFiles = make(map[int]map[string]group)
   190  	c.events = events.Uncore.Events
   191  	c.eventToCustomEvent = parseUncoreEvents(events.Uncore)
   192  	c.cpuFilesLock.Lock()
   193  	defer c.cpuFilesLock.Unlock()
   194  
   195  	for i, group := range c.events {
   196  		// Check what PMUs are needed.
   197  		groupPMUs, err := parsePMUs(group, readUncorePMUs, c.eventToCustomEvent)
   198  		if err != nil {
   199  			return err
   200  		}
   201  
   202  		err = checkGroup(group, groupPMUs)
   203  		if err != nil {
   204  			return err
   205  		}
   206  
   207  		// CPUs file descriptors of group leader needed for perf_event_open.
   208  		leaderFileDescriptors := make(map[string]map[uint32]int)
   209  		for _, pmu := range readUncorePMUs {
   210  			leaderFileDescriptors[pmu.name] = make(map[uint32]int)
   211  			for _, cpu := range pmu.cpus {
   212  				leaderFileDescriptors[pmu.name][cpu] = groupLeaderFileDescriptor
   213  			}
   214  		}
   215  		leaderFileDescriptors, err = c.createLeaderFileDescriptors(group.events, i, groupPMUs, leaderFileDescriptors)
   216  		if err != nil {
   217  			klog.Error(err)
   218  			continue
   219  		}
   220  		// Group is prepared so we should reset and enable counting.
   221  		for _, pmuCPUs := range leaderFileDescriptors {
   222  			for _, fd := range pmuCPUs {
   223  				// Call only for used PMUs.
   224  				if fd != groupLeaderFileDescriptor {
   225  					err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_RESET, 0)
   226  					if err != nil {
   227  						return err
   228  					}
   229  					err = c.ioctlSetInt(fd, unix.PERF_EVENT_IOC_ENABLE, 0)
   230  					if err != nil {
   231  						return err
   232  					}
   233  				}
   234  			}
   235  		}
   236  	}
   237  
   238  	return nil
   239  }
   240  
   241  func checkGroup(group Group, eventPMUs map[Event]uncorePMUs) error {
   242  	if group.array {
   243  		var pmu uncorePMUs
   244  		for _, event := range group.events {
   245  			if len(eventPMUs[event]) > 1 {
   246  				return fmt.Errorf("the events in group usually have to be from single PMU, try reorganizing the \"%v\" group", group.events)
   247  			}
   248  			if len(eventPMUs[event]) == 1 {
   249  				if pmu == nil {
   250  					pmu = eventPMUs[event]
   251  					continue
   252  				}
   253  
   254  				eq := reflect.DeepEqual(pmu, eventPMUs[event])
   255  				if !eq {
   256  					return fmt.Errorf("the events in group usually have to be from the same PMU, try reorganizing the \"%v\" group", group.events)
   257  				}
   258  			}
   259  		}
   260  		return nil
   261  	}
   262  	if len(eventPMUs[group.events[0]]) < 1 {
   263  		return fmt.Errorf("the event %q don't have any PMU to count with", group.events[0])
   264  	}
   265  	return nil
   266  }
   267  
   268  func parseEventName(eventName string) (string, string) {
   269  	// First "/" separate pmu prefix and event name
   270  	// ex. "uncore_imc_0/cas_count_read" -> uncore_imc_0 and cas_count_read.
   271  	splittedEvent := strings.SplitN(eventName, "/", 2)
   272  	var pmuPrefix = ""
   273  	if len(splittedEvent) == 2 {
   274  		pmuPrefix = splittedEvent[0]
   275  		eventName = splittedEvent[1]
   276  	}
   277  	return eventName, pmuPrefix
   278  }
   279  
   280  func parsePMUs(group Group, pmus uncorePMUs, customEvents map[Event]*CustomEvent) (map[Event]uncorePMUs, error) {
   281  	eventPMUs := make(map[Event]uncorePMUs)
   282  	for _, event := range group.events {
   283  		_, prefix := parseEventName(string(event))
   284  		custom, ok := customEvents[event]
   285  		if ok {
   286  			if custom.Type != 0 {
   287  				pmu, err := getPMU(pmus, custom.Type)
   288  				if err != nil {
   289  					return nil, err
   290  				}
   291  				eventPMUs[event] = uncorePMUs{pmu.name: *pmu}
   292  				continue
   293  			}
   294  		}
   295  		eventPMUs[event] = obtainPMUs(prefix, pmus)
   296  	}
   297  
   298  	return eventPMUs, nil
   299  }
   300  
   301  func obtainPMUs(want string, gotPMUs uncorePMUs) uncorePMUs {
   302  	pmus := make(uncorePMUs)
   303  	if want == "" {
   304  		return pmus
   305  	}
   306  	for _, pmu := range gotPMUs {
   307  		if strings.HasPrefix(pmu.name, want) {
   308  			pmus[pmu.name] = pmu
   309  		}
   310  	}
   311  
   312  	return pmus
   313  }
   314  
   315  func parseUncoreEvents(events Events) map[Event]*CustomEvent {
   316  	eventToCustomEvent := map[Event]*CustomEvent{}
   317  	for _, group := range events.Events {
   318  		for _, uncoreEvent := range group.events {
   319  			for _, customEvent := range events.CustomEvents {
   320  				if uncoreEvent == customEvent.Name {
   321  					eventToCustomEvent[customEvent.Name] = &customEvent
   322  					break
   323  				}
   324  			}
   325  		}
   326  	}
   327  
   328  	return eventToCustomEvent
   329  }
   330  
   331  func (c *uncoreCollector) Destroy() {
   332  	c.cpuFilesLock.Lock()
   333  	defer c.cpuFilesLock.Unlock()
   334  
   335  	for groupIndex := range c.cpuFiles {
   336  		c.deleteGroup(groupIndex)
   337  		delete(c.cpuFiles, groupIndex)
   338  	}
   339  }
   340  
   341  func (c *uncoreCollector) UpdateStats(stats *info.ContainerStats) error {
   342  	klog.V(5).Info("Attempting to update uncore perf_event stats")
   343  
   344  	for _, groupPMUs := range c.cpuFiles {
   345  		for pmu, group := range groupPMUs {
   346  			for cpu, file := range group.cpuFiles[group.leaderName] {
   347  				stat, err := readPerfUncoreStat(file, group, cpu, pmu, c.cpuToSocket)
   348  				if err != nil {
   349  					klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", group.leaderName, cpu, pmu, err.Error())
   350  					continue
   351  				}
   352  
   353  				stats.PerfUncoreStats = append(stats.PerfUncoreStats, stat...)
   354  			}
   355  		}
   356  	}
   357  
   358  	return nil
   359  }
   360  
   361  func (c *uncoreCollector) setupEvent(name string, pmus uncorePMUs, groupIndex int, leaderFileDescriptors map[string]map[uint32]int) error {
   362  	if !isLibpfmInitialized {
   363  		return fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up")
   364  	}
   365  
   366  	klog.V(5).Infof("Setting up uncore perf event %s", name)
   367  
   368  	config, err := readPerfEventAttr(name, pfmGetOsEventEncoding)
   369  	if err != nil {
   370  		C.free((unsafe.Pointer)(config))
   371  		return err
   372  	}
   373  
   374  	// Register event for all memory controllers.
   375  	for _, pmu := range pmus {
   376  		config.Type = pmu.typeOf
   377  		isGroupLeader := leaderFileDescriptors[pmu.name][pmu.cpus[0]] == groupLeaderFileDescriptor
   378  		setAttributes(config, isGroupLeader)
   379  		leaderFileDescriptors[pmu.name], err = c.registerEvent(eventInfo{name, config, uncorePID, groupIndex, isGroupLeader}, pmu, leaderFileDescriptors[pmu.name])
   380  		if err != nil {
   381  			return err
   382  		}
   383  	}
   384  
   385  	// Clean memory allocated by C code.
   386  	C.free(unsafe.Pointer(config))
   387  
   388  	return nil
   389  }
   390  
   391  func (c *uncoreCollector) registerEvent(eventInfo eventInfo, pmu pmu, leaderFileDescriptors map[uint32]int) (map[uint32]int, error) {
   392  	newLeaderFileDescriptors := make(map[uint32]int)
   393  	isGroupLeader := false
   394  	for _, cpu := range pmu.cpus {
   395  		groupFd, flags := leaderFileDescriptors[cpu], 0
   396  		fd, err := c.perfEventOpen(eventInfo.config, eventInfo.pid, int(cpu), groupFd, flags)
   397  		if err != nil {
   398  			return nil, fmt.Errorf("setting up perf event %#v failed: %q | (pmu: %q, groupFd: %d, cpu: %d)", eventInfo.config, err, pmu, groupFd, cpu)
   399  		}
   400  		perfFile := os.NewFile(uintptr(fd), eventInfo.name)
   401  		if perfFile == nil {
   402  			return nil, fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
   403  		}
   404  
   405  		c.addEventFile(eventInfo.groupIndex, eventInfo.name, pmu.name, int(cpu), perfFile)
   406  
   407  		// If group leader, save fd for others.
   408  		if leaderFileDescriptors[cpu] == groupLeaderFileDescriptor {
   409  			newLeaderFileDescriptors[cpu] = fd
   410  			isGroupLeader = true
   411  		}
   412  	}
   413  
   414  	if isGroupLeader {
   415  		return newLeaderFileDescriptors, nil
   416  	}
   417  	return leaderFileDescriptors, nil
   418  }
   419  
   420  func (c *uncoreCollector) addEventFile(index int, name string, pmu string, cpu int, perfFile *os.File) {
   421  	_, ok := c.cpuFiles[index]
   422  	if !ok {
   423  		c.cpuFiles[index] = map[string]group{}
   424  	}
   425  
   426  	_, ok = c.cpuFiles[index][pmu]
   427  	if !ok {
   428  		c.cpuFiles[index][pmu] = group{
   429  			cpuFiles:   map[string]map[int]readerCloser{},
   430  			leaderName: name,
   431  		}
   432  	}
   433  
   434  	_, ok = c.cpuFiles[index][pmu].cpuFiles[name]
   435  	if !ok {
   436  		c.cpuFiles[index][pmu].cpuFiles[name] = map[int]readerCloser{}
   437  	}
   438  
   439  	c.cpuFiles[index][pmu].cpuFiles[name][cpu] = perfFile
   440  
   441  	// Check if name is already stored.
   442  	for _, have := range c.cpuFiles[index][pmu].names {
   443  		if name == have {
   444  			return
   445  		}
   446  	}
   447  
   448  	// Otherwise save it.
   449  	c.cpuFiles[index][pmu] = group{
   450  		cpuFiles:   c.cpuFiles[index][pmu].cpuFiles,
   451  		names:      append(c.cpuFiles[index][pmu].names, name),
   452  		leaderName: c.cpuFiles[index][pmu].leaderName,
   453  	}
   454  }
   455  
   456  func (c *uncoreCollector) setupRawEvent(event *CustomEvent, pmus uncorePMUs, groupIndex int, leaderFileDescriptors map[string]map[uint32]int) error {
   457  	klog.V(5).Infof("Setting up raw perf uncore event %#v", event)
   458  
   459  	for _, pmu := range pmus {
   460  		newEvent := CustomEvent{
   461  			Type:   pmu.typeOf,
   462  			Config: event.Config,
   463  			Name:   event.Name,
   464  		}
   465  		config := createPerfEventAttr(newEvent)
   466  		isGroupLeader := leaderFileDescriptors[pmu.name][pmu.cpus[0]] == groupLeaderFileDescriptor
   467  		setAttributes(config, isGroupLeader)
   468  		var err error
   469  		leaderFileDescriptors[pmu.name], err = c.registerEvent(eventInfo{string(newEvent.Name), config, uncorePID, groupIndex, isGroupLeader}, pmu, leaderFileDescriptors[pmu.name])
   470  		if err != nil {
   471  			return err
   472  		}
   473  	}
   474  
   475  	return nil
   476  }
   477  
   478  func (c *uncoreCollector) deleteGroup(groupIndex int) {
   479  	groupPMUs := c.cpuFiles[groupIndex]
   480  	for pmu, group := range groupPMUs {
   481  		for name, cpus := range group.cpuFiles {
   482  			for cpu, file := range cpus {
   483  				klog.V(5).Infof("Closing uncore perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
   484  				err := file.Close()
   485  				if err != nil {
   486  					klog.Warningf("Unable to close perf event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
   487  				}
   488  			}
   489  			delete(group.cpuFiles, name)
   490  		}
   491  		delete(groupPMUs, pmu)
   492  	}
   493  	delete(c.cpuFiles, groupIndex)
   494  }
   495  
   496  func readPerfUncoreStat(file readerCloser, group group, cpu int, pmu string, cpuToSocket map[int]int) ([]info.PerfUncoreStat, error) {
   497  	values, err := getPerfValues(file, group)
   498  	if err != nil {
   499  		return nil, err
   500  	}
   501  
   502  	socket, ok := cpuToSocket[cpu]
   503  	if !ok {
   504  		// Socket is unknown.
   505  		socket = -1
   506  	}
   507  
   508  	perfUncoreStats := make([]info.PerfUncoreStat, len(values))
   509  	for i, value := range values {
   510  		klog.V(5).Infof("Read metric for event %q for cpu %d from pmu %q: %d", value.Name, cpu, pmu, value.Value)
   511  		perfUncoreStats[i] = info.PerfUncoreStat{
   512  			PerfValue: value,
   513  			Socket:    socket,
   514  			PMU:       pmu,
   515  		}
   516  	}
   517  
   518  	return perfUncoreStats, nil
   519  }