bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/procstats_linux.go (about) 1 package collectors 2 3 import ( 4 "fmt" 5 "regexp" 6 "strconv" 7 "strings" 8 "time" 9 10 "bosun.org/metadata" 11 "bosun.org/opentsdb" 12 ) 13 14 func init() { 15 collectors = append(collectors, &IntervalCollector{F: c_procstats_linux}) 16 collectors = append(collectors, &IntervalCollector{F: c_interrupts_linux, Interval: time.Minute}) 17 collectors = append(collectors, &IntervalCollector{F: c_vmstat_linux, Interval: time.Minute}) 18 } 19 20 var uptimeRE = regexp.MustCompile(`(\S+)\s+(\S+)`) 21 var meminfoRE = regexp.MustCompile(`(\w+):\s+(\d+)\s+(\w+)`) 22 var vmstatRE = regexp.MustCompile(`(\w+)\s+(\d+)`) 23 var statRE = regexp.MustCompile(`(\w+)\s+(.*)`) 24 var statCPURE = regexp.MustCompile(`cpu(\d+)`) 25 var cpuspeedRE = regexp.MustCompile(`cpu MHz\s+: ([\d.]+)`) 26 var loadavgRE = regexp.MustCompile(`(\S+)\s+(\S+)\s+(\S+)\s+(\d+)/(\d+)\s+`) 27 var inoutRE = regexp.MustCompile(`(.*)(in|out)`) 28 29 var CPU_FIELDS = []string{ 30 "user", 31 "nice", 32 "system", 33 "idle", 34 "iowait", 35 "irq", 36 "softirq", 37 "steal", 38 "guest", 39 "guest_nice", 40 } 41 42 func c_procstats_linux() (opentsdb.MultiDataPoint, error) { 43 var md opentsdb.MultiDataPoint 44 var Error error 45 if err := readLine("/proc/uptime", func(s string) error { 46 m := uptimeRE.FindStringSubmatch(s) 47 if m == nil { 48 return nil 49 } 50 Add(&md, "linux.uptime_total", m[1], nil, metadata.Gauge, metadata.Second, osSystemUptimeDesc) 51 Add(&md, "linux.uptime_now", m[2], nil, metadata.Gauge, metadata.Second, "") 52 Add(&md, osSystemUptime, m[1], nil, metadata.Gauge, metadata.Second, osSystemUptimeDesc) 53 return nil 54 }); err != nil { 55 Error = err 56 } 57 mem := make(map[string]int64) 58 if err := readLine("/proc/meminfo", func(s string) error { 59 m := meminfoRE.FindStringSubmatch(s) 60 if m == nil { 61 return nil 62 } 63 i, err := strconv.ParseInt(m[2], 10, 64) 64 if err != nil { 65 return err 66 } 67 mem[m[1]] = i 68 Add(&md, "linux.mem."+strings.ToLower(m[1]), m[2], nil, metadata.Gauge, metadata.KBytes, "") 69 return nil 70 }); err != nil { 71 Error = err 72 } 73 bufferCacheSlab := mem["Buffers"] + mem["Cached"] + mem["Slab"] 74 memTotal := mem["MemTotal"] 75 memFree := mem["MemFree"] 76 // MemAvailable was introduced in the 3.14 kernel and is a more accurate measure of available memory 77 // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=34e431b0a 78 // We used this metric if it is available 79 available, availableIsAvailable := mem["MemAvailable"] 80 Add(&md, osMemTotal, memTotal*1024, nil, metadata.Gauge, metadata.Bytes, osMemTotalDesc) 81 freeValue := memFree + bufferCacheSlab 82 usedValue := memTotal - memFree - bufferCacheSlab 83 if availableIsAvailable { 84 freeValue = available 85 usedValue = memTotal - available 86 } 87 Add(&md, osMemFree, freeValue*1024, nil, metadata.Gauge, metadata.Bytes, osMemFreeDesc) 88 Add(&md, osMemUsed, usedValue*1024, nil, metadata.Gauge, metadata.Bytes, osMemUsedDesc) 89 if memTotal != 0 { 90 Add(&md, osMemPctFree, (float64(freeValue))/float64(memTotal)*100, nil, metadata.Gauge, metadata.Pct, osMemFreeDesc) 91 } 92 93 num_cores := 0 94 var t_util float64 95 cpu_stat_desc := map[string]string{ 96 "user": "Normal processes executing in user mode.", 97 "nice": "Niced processes executing in user mode.", 98 "system": "Processes executing in kernel mode.", 99 "idle": "Twiddling thumbs.", 100 "iowait": "Waiting for I/O to complete.", 101 "irq": "Servicing interrupts.", 102 "softirq": "Servicing soft irqs.", 103 "steal": "Involuntary wait.", 104 "guest": "Running a guest vm.", 105 "guest_nice": "Running a niced guest vm.", 106 } 107 if err := readLine("/proc/stat", func(s string) error { 108 m := statRE.FindStringSubmatch(s) 109 if m == nil { 110 return nil 111 } 112 if strings.HasPrefix(m[1], "cpu") { 113 metric_percpu := "" 114 tag_cpu := "" 115 cpu_m := statCPURE.FindStringSubmatch(m[1]) 116 if cpu_m != nil { 117 num_cores += 1 118 metric_percpu = ".percpu" 119 tag_cpu = cpu_m[1] 120 } 121 fields := strings.Fields(m[2]) 122 for i, value := range fields { 123 if i >= len(CPU_FIELDS) { 124 break 125 } 126 tags := opentsdb.TagSet{ 127 "type": CPU_FIELDS[i], 128 } 129 if tag_cpu != "" { 130 tags["cpu"] = tag_cpu 131 } 132 Add(&md, "linux.cpu"+metric_percpu, value, tags, metadata.Counter, metadata.CHz, cpu_stat_desc[CPU_FIELDS[i]]) 133 } 134 if metric_percpu == "" { 135 if len(fields) < 3 { 136 return nil 137 } 138 user, err := strconv.ParseFloat(fields[0], 64) 139 if err != nil { 140 return nil 141 } 142 nice, err := strconv.ParseFloat(fields[1], 64) 143 if err != nil { 144 return nil 145 } 146 system, err := strconv.ParseFloat(fields[2], 64) 147 if err != nil { 148 return nil 149 } 150 t_util = user + nice + system 151 } 152 } else if m[1] == "intr" { 153 Add(&md, "linux.intr", strings.Fields(m[2])[0], nil, metadata.Counter, metadata.Interupt, "") 154 } else if m[1] == "ctxt" { 155 Add(&md, "linux.ctxt", m[2], nil, metadata.Counter, metadata.ContextSwitch, "") 156 } else if m[1] == "processes" { 157 Add(&md, "linux.processes", m[2], nil, metadata.Counter, metadata.Process, 158 "The number of processes and threads created, which includes (but is not limited to) those created by calls to the fork() and clone() system calls.") 159 } else if m[1] == "procs_blocked" { 160 Add(&md, "linux.procs_blocked", m[2], nil, metadata.Gauge, metadata.Process, "The number of processes currently blocked, waiting for I/O to complete.") 161 } 162 return nil 163 }); err != nil { 164 Error = err 165 } 166 if num_cores != 0 && t_util != 0 { 167 Add(&md, osCPU, t_util/float64(num_cores), nil, metadata.Counter, metadata.Pct, "") 168 } 169 cpuinfo_index := 0 170 if err := readLine("/proc/cpuinfo", func(s string) error { 171 m := cpuspeedRE.FindStringSubmatch(s) 172 if m == nil { 173 return nil 174 } 175 tags := opentsdb.TagSet{"cpu": strconv.Itoa(cpuinfo_index)} 176 Add(&md, osCPUClock, m[1], tags, metadata.Gauge, metadata.MHz, osCPUClockDesc) 177 Add(&md, "linux.cpu.clock", m[1], tags, metadata.Gauge, metadata.MHz, osCPUClockDesc) 178 cpuinfo_index += 1 179 return nil 180 }); err != nil { 181 Error = err 182 } 183 if err := readLine("/proc/loadavg", func(s string) error { 184 m := loadavgRE.FindStringSubmatch(s) 185 if m == nil { 186 return nil 187 } 188 Add(&md, "linux.loadavg_1_min", m[1], nil, metadata.Gauge, metadata.Load, "") 189 Add(&md, "linux.loadavg_5_min", m[2], nil, metadata.Gauge, metadata.Load, "") 190 Add(&md, "linux.loadavg_15_min", m[3], nil, metadata.Gauge, metadata.Load, "") 191 Add(&md, "linux.loadavg_runnable", m[4], nil, metadata.Gauge, metadata.Process, "") 192 Add(&md, "linux.loadavg_total_threads", m[5], nil, metadata.Gauge, metadata.Process, "") 193 return nil 194 }); err != nil { 195 Error = err 196 } 197 if err := readLine("/proc/sys/kernel/random/entropy_avail", func(s string) error { 198 Add(&md, "linux.entropy_avail", strings.TrimSpace(s), nil, metadata.Gauge, metadata.Entropy, "The remaing amount of entropy available to the system. If it is low or hitting zero processes might be blocked waiting for extropy") 199 return nil 200 }); err != nil { 201 Error = err 202 } 203 204 if err := readLine("/proc/net/sockstat", func(s string) error { 205 cols := strings.Fields(s) 206 switch cols[0] { 207 case "sockets:": 208 if len(cols) < 3 { 209 return fmt.Errorf("sockstat: error parsing sockets line") 210 } 211 Add(&md, "linux.net.sockets.used", cols[2], nil, metadata.Gauge, metadata.Socket, "") 212 case "TCP:": 213 if len(cols) < 11 { 214 return fmt.Errorf("sockstat: error parsing tcp line") 215 } 216 Add(&md, "linux.net.sockets.tcp_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "") 217 Add(&md, "linux.net.sockets.tcp_orphaned", cols[4], nil, metadata.Gauge, metadata.Socket, "") 218 Add(&md, "linux.net.sockets.tcp_time_wait", cols[6], nil, metadata.Gauge, metadata.Socket, "") 219 Add(&md, "linux.net.sockets.tcp_allocated", cols[8], nil, metadata.Gauge, metadata.None, "") 220 Add(&md, "linux.net.sockets.tcp_mem", cols[10], nil, metadata.Gauge, metadata.None, "") 221 case "UDP:": 222 if len(cols) < 5 { 223 return fmt.Errorf("sockstat: error parsing udp line") 224 } 225 Add(&md, "linux.net.sockets.udp_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "") 226 Add(&md, "linux.net.sockets.udp_mem", cols[4], nil, metadata.Gauge, metadata.Page, "") 227 case "UDPLITE:": 228 if len(cols) < 3 { 229 return fmt.Errorf("sockstat: error parsing udplite line") 230 } 231 Add(&md, "linux.net.sockets.udplite_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "") 232 case "RAW:": 233 if len(cols) < 3 { 234 return fmt.Errorf("sockstat: error parsing raw line") 235 } 236 Add(&md, "linux.net.sockets.raw_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "") 237 case "FRAG:": 238 if len(cols) < 5 { 239 return fmt.Errorf("sockstat: error parsing frag line") 240 } 241 Add(&md, "linux.net.sockets.frag_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "") 242 Add(&md, "linux.net.sockets.frag_mem", cols[4], nil, metadata.Gauge, metadata.Bytes, "") 243 } 244 return nil 245 }); err != nil { 246 Error = err 247 } 248 ln := 0 249 var headers []string 250 if err := readLine("/proc/net/netstat", func(s string) error { 251 cols := strings.Fields(s) 252 if ln%2 == 0 { 253 headers = cols 254 } else { 255 if len(cols) < 1 || len(cols) != len(headers) { 256 return fmt.Errorf("netstat: parsing failed") 257 } 258 root := strings.ToLower(strings.TrimSuffix(headers[0], "Ext:")) 259 for i, v := range cols[1:] { 260 i++ 261 m := "linux.net.stat." + root + "." + strings.TrimPrefix(strings.ToLower(headers[i]), "tcp") 262 Add(&md, m, v, nil, metadata.Counter, metadata.None, "") 263 } 264 } 265 ln += 1 266 return nil 267 }); err != nil { 268 Error = err 269 } 270 ln = 0 271 if err := readLine("/proc/net/snmp", func(s string) error { 272 ln++ 273 if ln%2 != 0 { 274 f := strings.Fields(s) 275 if len(f) < 2 { 276 return fmt.Errorf("Failed to parse header line") 277 } 278 headers = f 279 } else { 280 values := strings.Fields(s) 281 if len(values) != len(headers) { 282 return fmt.Errorf("Mismatched header and value length") 283 } 284 proto := strings.ToLower(strings.TrimSuffix(values[0], ":")) 285 for i, v := range values { 286 if i == 0 { 287 continue 288 } 289 var stype metadata.RateType = metadata.Counter 290 stat := strings.ToLower(headers[i]) 291 if strings.HasPrefix(stat, "rto") { 292 stype = metadata.Gauge 293 } 294 Add(&md, "linux.net.stat."+proto+"."+stat, v, nil, stype, metadata.None, "") 295 } 296 } 297 return nil 298 }); err != nil { 299 Error = err 300 } 301 if err := readLine("/proc/sys/fs/file-nr", func(s string) error { 302 f := strings.Fields(s) 303 if len(f) != 3 { 304 return fmt.Errorf("unexpected number of fields") 305 } 306 v, err := strconv.ParseInt(f[0], 10, 64) 307 if err != nil { 308 return err 309 } 310 Add(&md, "linux.fs.open", v, nil, metadata.Gauge, metadata.Count, "The number of files presently open.") 311 return nil 312 }); err != nil { 313 Error = err 314 } 315 return md, Error 316 } 317 318 func c_interrupts_linux() (opentsdb.MultiDataPoint, error) { 319 var md opentsdb.MultiDataPoint 320 irq_type_desc := map[string]string{ 321 "NMI": "Non-maskable interrupts.", 322 "LOC": "Local timer interrupts.", 323 "SPU": "Spurious interrupts.", 324 "PMI": "Performance monitoring interrupts.", 325 "IWI": "IRQ work interrupts.", 326 "RES": "Rescheduling interrupts.", 327 "CAL": "Funcation call interupts.", 328 "TLB": "TLB (translation lookaside buffer) shootdowns.", 329 "TRM": "Thermal event interrupts.", 330 "THR": "Threshold APIC interrupts.", 331 "MCE": "Machine check exceptions.", 332 "MCP": "Machine Check polls.", 333 } 334 num_cpus := 0 335 if err := readLine("/proc/interrupts", func(s string) error { 336 cols := strings.Fields(s) 337 if num_cpus == 0 { 338 num_cpus = len(cols) 339 return nil 340 } else if len(cols) < 2 { 341 return nil 342 } 343 irq_type := strings.TrimRight(cols[0], ":") 344 if !IsAlNum(irq_type) { 345 return nil 346 } 347 if IsDigit(irq_type) { 348 if cols[len(cols)-2] == "PCI-MSI-edge" && strings.Contains(cols[len(cols)-1], "eth") { 349 irq_type = cols[len(cols)-1] 350 } else { 351 // Interrupt type is just a number, ignore. 352 return nil 353 } 354 } 355 for i, val := range cols[1:] { 356 if i >= num_cpus || !IsDigit(val) { 357 // All values read, remaining cols contain textual description. 358 break 359 } 360 Add(&md, "linux.interrupts", val, opentsdb.TagSet{"type": irq_type, "cpu": strconv.Itoa(i)}, metadata.Counter, metadata.Interupt, irq_type_desc[irq_type]) 361 } 362 return nil 363 }); err != nil { 364 return nil, err 365 } 366 return md, nil 367 } 368 369 func c_vmstat_linux() (opentsdb.MultiDataPoint, error) { 370 var md opentsdb.MultiDataPoint 371 if err := readLine("/proc/vmstat", func(s string) error { 372 m := vmstatRE.FindStringSubmatch(s) 373 if m == nil { 374 return nil 375 } 376 switch m[1] { 377 case "pgpgin", "pgpgout", "pswpin", "pswpout", "pgfault", "pgmajfault": 378 mio := inoutRE.FindStringSubmatch(m[1]) 379 if mio != nil { 380 Add(&md, "linux.mem."+mio[1], m[2], opentsdb.TagSet{"direction": mio[2]}, metadata.Counter, metadata.Page, "") 381 } else { 382 Add(&md, "linux.mem."+m[1], m[2], nil, metadata.Counter, metadata.Page, "") 383 } 384 default: 385 Add(&md, "linux.mem."+m[1], m[2], nil, metadata.Counter, metadata.None, "") 386 } 387 return nil 388 }); err != nil { 389 return nil, err 390 } 391 return md, nil 392 }