github.com/dylandreimerink/gobpfld@v0.6.1-0.20220205171531-e79c330ad608/internal/syscall/perf.go (about) 1 package syscall 2 3 import ( 4 "syscall" 5 "unsafe" 6 7 "golang.org/x/sys/unix" 8 ) 9 10 // PerfEventAttr is the go version of the perf_event_attr struct as defined by the kernel. 11 // https://elixir.bootlin.com/linux/v5.14.14/source/include/uapi/linux/perf_event.h#L338 12 type PerfEventAttr struct { 13 Type PerfType 14 Size uint32 15 Config uint64 16 // union of sample_period and sample_frequency 17 SamplePeriodFreq uint64 18 SampleType uint64 19 AttrFlags PerfAttrFlags 20 // union of wakeup_events and wakeup_watermark 21 WakeupEventsWatermark uint32 22 BPType uint32 23 // union of bp_addr, kprobe_func, uprobe_path, and config1 24 BPAddr uintptr 25 // union of bp_len, kprobe_addr, probe_offset, and config2 26 BPLen uint64 27 // Unum of perf_branch_sample_type 28 BranchSampleType uint64 29 // Defines set of user regs to dump on samples. 30 // See asm/perf_regs.h for details. 31 SampleRegsUser uint64 32 // Defines size of the user stack to dump on samples. 33 SampleStackUser uint32 34 ClockID int32 35 // Defines set of regs to dump for each sample 36 // state captured on: 37 // - precise = 0: PMU interrupt 38 // - precise > 0: sampled instruction 39 // 40 // See asm/perf_regs.h for details. 41 SampleRegsIntr uint64 42 // Wakeup watermark for AUX area 43 AUXWatermark uint32 44 SampleMaxStack uint16 45 // __reserved_2 46 _ uint16 47 AUXSampleSize uint32 48 // __reserved_3 49 _ uint32 50 SigData uint64 51 } 52 53 const AttrSize = uint32(unsafe.Sizeof(PerfEventAttr{})) 54 55 // PerfType https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/perf_event.h#L32 56 type PerfType uint32 57 58 const ( 59 // PERF_TYPE_HARDWARE This indicates one of the "generalized" hardware events 60 // provided by the kernel. See the config field definition 61 // for more details. 62 PERF_TYPE_HARDWARE PerfType = iota 63 64 // PERF_TYPE_SOFTWARE This indicates one of the software-defined events provided 65 // by the kernel (even if no hardware support is 66 // available). 67 PERF_TYPE_SOFTWARE 68 69 // PERF_TYPE_TRACEPOINT This indicates a tracepoint provided by the kernel tracepoint infrastructure. 70 PERF_TYPE_TRACEPOINT 71 72 // PERF_TYPE_HW_CACHE This indicates a hardware cache event. This has a special encoding, 73 // described in the config field definition. 74 PERF_TYPE_HW_CACHE 75 76 // PERF_TYPE_RAW This indicates a "raw" implementation-specific event in 77 // the config field. 78 PERF_TYPE_RAW 79 80 // PERF_TYPE_BREAKPOINT This indicates a hardware breakpoint as provided by the CPU. 81 // Breakpoints can be read/write accesses to an address as well as execution of an instruction address. 82 PERF_TYPE_BREAKPOINT 83 ) 84 85 // PerfAttrFlags are used to pass a lot of boolean flags efficiently to the kerenl 86 type PerfAttrFlags uint64 87 88 const ( 89 // PerfAttrFlagsDisabled off by default 90 PerfAttrFlagsDisabled PerfAttrFlags = 1 << iota 91 // PerfAttrFlagsInherit children inherit it 92 PerfAttrFlagsInherit 93 // PerfAttrFlagsPinned must always be on PMU 94 PerfAttrFlagsPinned 95 // PerfAttrFlagsExclusive only group on PMU 96 PerfAttrFlagsExclusive 97 // PerfAttrFlagsExcludeUser don't count user 98 PerfAttrFlagsExcludeUser 99 // PerfAttrFlagsExcludeKernel ditto kernel 100 PerfAttrFlagsExcludeKernel 101 // PerfAttrFlagsExcludeHV ditto hypervisor 102 PerfAttrFlagsExcludeHV 103 // PerfAttrFlagsExcludeIdle don't count when idle 104 PerfAttrFlagsExcludeIdle 105 // PerfAttrFlagsMmap include mmap data 106 PerfAttrFlagsMmap 107 // PerfAttrFlagsComm include comm data 108 PerfAttrFlagsComm 109 // PerfAttrFlagsFreq use freq, not period 110 PerfAttrFlagsFreq 111 // PerfAttrFlagsInheritStat per task counts 112 PerfAttrFlagsInheritStat 113 // PerfAttrFlagsEnableOnExec next exec enables 114 PerfAttrFlagsEnableOnExec 115 // PerfAttrFlagsTask trace fork/exit 116 PerfAttrFlagsTask 117 // PerfAttrFlagsWatermark wakeup_watermark 118 PerfAttrFlagsWatermark 119 // PerfAttrFlagsPreciseIPConstantSkid SAMPLE_IP must have constant skid, See also PERF_RECORD_MISC_EXACT_IP 120 PerfAttrFlagsPreciseIPConstantSkid PerfAttrFlags = 1 << 15 121 // PerfAttrFlagsPreciseIPRequestZeroSkid SAMPLE_IP requested to have 0 skid, See also PERF_RECORD_MISC_EXACT_IP 122 PerfAttrFlagsPreciseIPRequestZeroSkid PerfAttrFlags = 1 << 16 123 // PerfAttrFlagsPreciseIPRequireZeroSkid SAMPLE_IP must have 0 skid, See also PERF_RECORD_MISC_EXACT_IP 124 PerfAttrFlagsPreciseIPRequireZeroSkid PerfAttrFlags = 1<<16 + 1<<15 125 ) 126 127 type PerfEventOpenFlags uintptr 128 129 const ( 130 // PerfEventOpenFDNoGroup This flag tells the event to ignore the group_fd parameter ex‐ 131 // cept for the purpose of setting up output redirection using the 132 // PERF_FLAG_FD_OUTPUT flag. 133 PerfEventOpenFDNoGroup PerfEventOpenFlags = 1 << iota 134 135 // PerfEventOpenFDOutput This flag re-routes the event's sampled output to instead be in‐ 136 // cluded in the mmap buffer of the event specified by group_fd. 137 PerfEventOpenFDOutput 138 139 // PerfEventOpenPIDCgroup This flag tells the event to ignore the group_fd parameter ex‐ 140 // cept for the purpose of setting up output redirection using the 141 // PERF_FLAG_FD_OUTPUT flag. 142 PerfEventOpenPIDCgroup 143 144 // PerfEventOpenFDCloseOnExit This flag enables the close-on-exec flag for the created event 145 // file descriptor, so that the file descriptor is automatically 146 // closed on execve(2). Setting the close-on-exec flags at cre‐ 147 // ation time, rather than later with fcntl(2), avoids potential 148 // race conditions where the calling thread invokes 149 // perf_event_open() and fcntl(2) at the same time as another 150 // thread calls fork(2) then execve(2). 151 PerfEventOpenFDCloseOnExit 152 ) 153 154 // PerfEventOpen is a wrapper around the perf_event_open syscall. 155 func PerfEventOpen(attr PerfEventAttr, pid, cpu, groupFD int, flags PerfEventOpenFlags) (uintptr, error) { 156 fd, _, errno := unix.Syscall6( 157 unix.SYS_PERF_EVENT_OPEN, 158 uintptr(unsafe.Pointer(&attr)), 159 uintptr(pid), 160 uintptr(cpu), 161 uintptr(groupFD), 162 uintptr(flags), 163 0, 164 ) 165 if errno != 0 { 166 return 0, &Error{ 167 Errno: errno, 168 Err: perfEventOpenErrors[errno], 169 } 170 } 171 172 return fd, nil 173 } 174 175 var perfEventOpenErrors = map[syscall.Errno]string{ 176 unix.E2BIG: "The perf_event_attr size value is too small (smaller " + 177 "than PERF_ATTR_SIZE_VER0), too big (larger than the page size), " + 178 "or larger than the kernel supports and the extra bytes are not " + 179 "zero. When E2BIG is returned, the perf_event_attr size field is " + 180 "overwritten by the kernel to be the size of the structure it was " + 181 "expecting.", 182 183 unix.EACCES: "The requested event requires CAP_PERFMON (since " + 184 "Linux 5.8) or CAP_SYS_ADMIN permissions (or a more permissive " + 185 "perf_event paranoid setting). Some common cases where an un‐ " + 186 "privileged process may encounter this error: attaching to a " + 187 "process owned by a different user; monitoring all processes on a " + 188 "given CPU (i.e., specifying the pid argument as -1); and not " + 189 "setting exclude_kernel when the paranoid setting requires it.", 190 191 unix.EBADF: "The group_fd file descriptor is not valid, or, if " + 192 "PERF_FLAG_PID_CGROUP is set, the cgroup file descriptor in pid " + 193 "is not valid.", 194 195 unix.EBUSY: "Another event already has exclusive access to the PMU.", 196 197 unix.EFAULT: "The attr pointer points at an invalid memory address.", 198 199 unix.EINVAL: "The specified event is invalid. There are many pos‐ " + 200 "sible reasons for this. A not-exhaustive list: sample_freq is " + 201 "higher than the maximum setting; the cpu to monitor does not ex‐ " + 202 "ist; read_format is out of range; sample_type is out of range; " + 203 "the flags value is out of range; exclusive or pinned set and the " + 204 "event is not a group leader; the event config values are out of " + 205 "range or set reserved bits; the generic event selected is not " + 206 "supported; or there is not enough room to add the selected " + 207 "event.", 208 209 unix.EINTR: "Returned when trying to mix perf and ftrace handling for a uprobe.", 210 211 unix.EMFILE: "Each opened event uses one file descriptor. If a large number " + 212 "of events are opened, the per-process limit on the number of " + 213 "open file descriptors will be reached, and no more events can be " + 214 "created.", 215 216 unix.ENODEV: "Returned when the event involves a feature not supported by the current CPU.", 217 218 unix.ENOENT: "Returned if the type setting is not valid. " + 219 "This error is also returned for some unsupported generic events.", 220 221 unix.ENOSPC: "Prior to Linux 3.3, if there was not enough room for the event, " + 222 "ENOSPC was returned. In Linux 3.3, this was changed to EINVAL. " + 223 "ENOSPC is still returned if you try to add more breakpoint " + 224 "events than supported by the hardware.", 225 226 unix.ENOSYS: "Returned if PERF_SAMPLE_STACK_USER is set in sample_type and it " + 227 "is not supported by hardware.", 228 229 unix.EOPNOTSUPP: "Returned if an event requiring a specific hardware feature is " + 230 "requested but there is no hardware support. This includes re‐ " + 231 "questing low-skid events if not supported, branch tracing if it " + 232 "is not available, sampling if no PMU interrupt is available, and " + 233 "branch stacks for software events.", 234 235 unix.EOVERFLOW: "(since Linux 4.8) " + 236 "Returned if PERF_SAMPLE_CALLCHAIN is requested and sam‐ " + 237 "ple_max_stack is larger than the maximum specified in " + 238 "/proc/sys/kernel/perf_event_max_stack.", 239 240 unix.EPERM: "Returned on many (but not all) architectures when an unsupported " + 241 "exclude_hv, exclude_idle, exclude_user, or exclude_kernel set‐ " + 242 "ting is specified. \n" + 243 "It can also happen, as with EACCES, when the requested event re‐ " + 244 "quires CAP_PERFMON (since Linux 5.8) or CAP_SYS_ADMIN permis‐ " + 245 "sions (or a more permissive perf_event paranoid setting). This " + 246 "includes setting a breakpoint on a kernel address, and (since " + 247 "Linux 3.13) setting a kernel function-trace tracepoint.", 248 249 unix.ESRCH: "Returned if attempting to attach to a process that does not exist.", 250 }