bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/procstats_linux.go (about)

     1  package collectors
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"strconv"
     7  	"strings"
     8  	"time"
     9  
    10  	"bosun.org/metadata"
    11  	"bosun.org/opentsdb"
    12  )
    13  
    14  func init() {
    15  	collectors = append(collectors, &IntervalCollector{F: c_procstats_linux})
    16  	collectors = append(collectors, &IntervalCollector{F: c_interrupts_linux, Interval: time.Minute})
    17  	collectors = append(collectors, &IntervalCollector{F: c_vmstat_linux, Interval: time.Minute})
    18  }
    19  
    20  var uptimeRE = regexp.MustCompile(`(\S+)\s+(\S+)`)
    21  var meminfoRE = regexp.MustCompile(`(\w+):\s+(\d+)\s+(\w+)`)
    22  var vmstatRE = regexp.MustCompile(`(\w+)\s+(\d+)`)
    23  var statRE = regexp.MustCompile(`(\w+)\s+(.*)`)
    24  var statCPURE = regexp.MustCompile(`cpu(\d+)`)
    25  var cpuspeedRE = regexp.MustCompile(`cpu MHz\s+: ([\d.]+)`)
    26  var loadavgRE = regexp.MustCompile(`(\S+)\s+(\S+)\s+(\S+)\s+(\d+)/(\d+)\s+`)
    27  var inoutRE = regexp.MustCompile(`(.*)(in|out)`)
    28  
    29  var CPU_FIELDS = []string{
    30  	"user",
    31  	"nice",
    32  	"system",
    33  	"idle",
    34  	"iowait",
    35  	"irq",
    36  	"softirq",
    37  	"steal",
    38  	"guest",
    39  	"guest_nice",
    40  }
    41  
    42  func c_procstats_linux() (opentsdb.MultiDataPoint, error) {
    43  	var md opentsdb.MultiDataPoint
    44  	var Error error
    45  	if err := readLine("/proc/uptime", func(s string) error {
    46  		m := uptimeRE.FindStringSubmatch(s)
    47  		if m == nil {
    48  			return nil
    49  		}
    50  		Add(&md, "linux.uptime_total", m[1], nil, metadata.Gauge, metadata.Second, osSystemUptimeDesc)
    51  		Add(&md, "linux.uptime_now", m[2], nil, metadata.Gauge, metadata.Second, "")
    52  		Add(&md, osSystemUptime, m[1], nil, metadata.Gauge, metadata.Second, osSystemUptimeDesc)
    53  		return nil
    54  	}); err != nil {
    55  		Error = err
    56  	}
    57  	mem := make(map[string]int64)
    58  	if err := readLine("/proc/meminfo", func(s string) error {
    59  		m := meminfoRE.FindStringSubmatch(s)
    60  		if m == nil {
    61  			return nil
    62  		}
    63  		i, err := strconv.ParseInt(m[2], 10, 64)
    64  		if err != nil {
    65  			return err
    66  		}
    67  		mem[m[1]] = i
    68  		Add(&md, "linux.mem."+strings.ToLower(m[1]), m[2], nil, metadata.Gauge, metadata.KBytes, "")
    69  		return nil
    70  	}); err != nil {
    71  		Error = err
    72  	}
    73  	bufferCacheSlab := mem["Buffers"] + mem["Cached"] + mem["Slab"]
    74  	memTotal := mem["MemTotal"]
    75  	memFree := mem["MemFree"]
    76  	// MemAvailable was introduced in the 3.14 kernel and is a more accurate measure of available memory
    77  	// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=34e431b0a
    78  	// We used this metric if it is available
    79  	available, availableIsAvailable := mem["MemAvailable"]
    80  	Add(&md, osMemTotal, memTotal*1024, nil, metadata.Gauge, metadata.Bytes, osMemTotalDesc)
    81  	freeValue := memFree + bufferCacheSlab
    82  	usedValue := memTotal - memFree - bufferCacheSlab
    83  	if availableIsAvailable {
    84  		freeValue = available
    85  		usedValue = memTotal - available
    86  	}
    87  	Add(&md, osMemFree, freeValue*1024, nil, metadata.Gauge, metadata.Bytes, osMemFreeDesc)
    88  	Add(&md, osMemUsed, usedValue*1024, nil, metadata.Gauge, metadata.Bytes, osMemUsedDesc)
    89  	if memTotal != 0 {
    90  		Add(&md, osMemPctFree, (float64(freeValue))/float64(memTotal)*100, nil, metadata.Gauge, metadata.Pct, osMemFreeDesc)
    91  	}
    92  
    93  	num_cores := 0
    94  	var t_util float64
    95  	cpu_stat_desc := map[string]string{
    96  		"user":       "Normal processes executing in user mode.",
    97  		"nice":       "Niced processes executing in user mode.",
    98  		"system":     "Processes executing in kernel mode.",
    99  		"idle":       "Twiddling thumbs.",
   100  		"iowait":     "Waiting for I/O to complete.",
   101  		"irq":        "Servicing interrupts.",
   102  		"softirq":    "Servicing soft irqs.",
   103  		"steal":      "Involuntary wait.",
   104  		"guest":      "Running a guest vm.",
   105  		"guest_nice": "Running a niced guest vm.",
   106  	}
   107  	if err := readLine("/proc/stat", func(s string) error {
   108  		m := statRE.FindStringSubmatch(s)
   109  		if m == nil {
   110  			return nil
   111  		}
   112  		if strings.HasPrefix(m[1], "cpu") {
   113  			metric_percpu := ""
   114  			tag_cpu := ""
   115  			cpu_m := statCPURE.FindStringSubmatch(m[1])
   116  			if cpu_m != nil {
   117  				num_cores += 1
   118  				metric_percpu = ".percpu"
   119  				tag_cpu = cpu_m[1]
   120  			}
   121  			fields := strings.Fields(m[2])
   122  			for i, value := range fields {
   123  				if i >= len(CPU_FIELDS) {
   124  					break
   125  				}
   126  				tags := opentsdb.TagSet{
   127  					"type": CPU_FIELDS[i],
   128  				}
   129  				if tag_cpu != "" {
   130  					tags["cpu"] = tag_cpu
   131  				}
   132  				Add(&md, "linux.cpu"+metric_percpu, value, tags, metadata.Counter, metadata.CHz, cpu_stat_desc[CPU_FIELDS[i]])
   133  			}
   134  			if metric_percpu == "" {
   135  				if len(fields) < 3 {
   136  					return nil
   137  				}
   138  				user, err := strconv.ParseFloat(fields[0], 64)
   139  				if err != nil {
   140  					return nil
   141  				}
   142  				nice, err := strconv.ParseFloat(fields[1], 64)
   143  				if err != nil {
   144  					return nil
   145  				}
   146  				system, err := strconv.ParseFloat(fields[2], 64)
   147  				if err != nil {
   148  					return nil
   149  				}
   150  				t_util = user + nice + system
   151  			}
   152  		} else if m[1] == "intr" {
   153  			Add(&md, "linux.intr", strings.Fields(m[2])[0], nil, metadata.Counter, metadata.Interupt, "")
   154  		} else if m[1] == "ctxt" {
   155  			Add(&md, "linux.ctxt", m[2], nil, metadata.Counter, metadata.ContextSwitch, "")
   156  		} else if m[1] == "processes" {
   157  			Add(&md, "linux.processes", m[2], nil, metadata.Counter, metadata.Process,
   158  				"The number  of processes and threads created, which includes (but  is not limited  to) those  created by  calls to the  fork() and clone() system calls.")
   159  		} else if m[1] == "procs_blocked" {
   160  			Add(&md, "linux.procs_blocked", m[2], nil, metadata.Gauge, metadata.Process, "The  number of  processes currently blocked, waiting for I/O to complete.")
   161  		}
   162  		return nil
   163  	}); err != nil {
   164  		Error = err
   165  	}
   166  	if num_cores != 0 && t_util != 0 {
   167  		Add(&md, osCPU, t_util/float64(num_cores), nil, metadata.Counter, metadata.Pct, "")
   168  	}
   169  	cpuinfo_index := 0
   170  	if err := readLine("/proc/cpuinfo", func(s string) error {
   171  		m := cpuspeedRE.FindStringSubmatch(s)
   172  		if m == nil {
   173  			return nil
   174  		}
   175  		tags := opentsdb.TagSet{"cpu": strconv.Itoa(cpuinfo_index)}
   176  		Add(&md, osCPUClock, m[1], tags, metadata.Gauge, metadata.MHz, osCPUClockDesc)
   177  		Add(&md, "linux.cpu.clock", m[1], tags, metadata.Gauge, metadata.MHz, osCPUClockDesc)
   178  		cpuinfo_index += 1
   179  		return nil
   180  	}); err != nil {
   181  		Error = err
   182  	}
   183  	if err := readLine("/proc/loadavg", func(s string) error {
   184  		m := loadavgRE.FindStringSubmatch(s)
   185  		if m == nil {
   186  			return nil
   187  		}
   188  		Add(&md, "linux.loadavg_1_min", m[1], nil, metadata.Gauge, metadata.Load, "")
   189  		Add(&md, "linux.loadavg_5_min", m[2], nil, metadata.Gauge, metadata.Load, "")
   190  		Add(&md, "linux.loadavg_15_min", m[3], nil, metadata.Gauge, metadata.Load, "")
   191  		Add(&md, "linux.loadavg_runnable", m[4], nil, metadata.Gauge, metadata.Process, "")
   192  		Add(&md, "linux.loadavg_total_threads", m[5], nil, metadata.Gauge, metadata.Process, "")
   193  		return nil
   194  	}); err != nil {
   195  		Error = err
   196  	}
   197  	if err := readLine("/proc/sys/kernel/random/entropy_avail", func(s string) error {
   198  		Add(&md, "linux.entropy_avail", strings.TrimSpace(s), nil, metadata.Gauge, metadata.Entropy, "The remaing amount of entropy available to the system. If it is low or hitting zero processes might be blocked waiting for extropy")
   199  		return nil
   200  	}); err != nil {
   201  		Error = err
   202  	}
   203  
   204  	if err := readLine("/proc/net/sockstat", func(s string) error {
   205  		cols := strings.Fields(s)
   206  		switch cols[0] {
   207  		case "sockets:":
   208  			if len(cols) < 3 {
   209  				return fmt.Errorf("sockstat: error parsing sockets line")
   210  			}
   211  			Add(&md, "linux.net.sockets.used", cols[2], nil, metadata.Gauge, metadata.Socket, "")
   212  		case "TCP:":
   213  			if len(cols) < 11 {
   214  				return fmt.Errorf("sockstat: error parsing tcp line")
   215  			}
   216  			Add(&md, "linux.net.sockets.tcp_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "")
   217  			Add(&md, "linux.net.sockets.tcp_orphaned", cols[4], nil, metadata.Gauge, metadata.Socket, "")
   218  			Add(&md, "linux.net.sockets.tcp_time_wait", cols[6], nil, metadata.Gauge, metadata.Socket, "")
   219  			Add(&md, "linux.net.sockets.tcp_allocated", cols[8], nil, metadata.Gauge, metadata.None, "")
   220  			Add(&md, "linux.net.sockets.tcp_mem", cols[10], nil, metadata.Gauge, metadata.None, "")
   221  		case "UDP:":
   222  			if len(cols) < 5 {
   223  				return fmt.Errorf("sockstat: error parsing udp line")
   224  			}
   225  			Add(&md, "linux.net.sockets.udp_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "")
   226  			Add(&md, "linux.net.sockets.udp_mem", cols[4], nil, metadata.Gauge, metadata.Page, "")
   227  		case "UDPLITE:":
   228  			if len(cols) < 3 {
   229  				return fmt.Errorf("sockstat: error parsing udplite line")
   230  			}
   231  			Add(&md, "linux.net.sockets.udplite_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "")
   232  		case "RAW:":
   233  			if len(cols) < 3 {
   234  				return fmt.Errorf("sockstat: error parsing raw line")
   235  			}
   236  			Add(&md, "linux.net.sockets.raw_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "")
   237  		case "FRAG:":
   238  			if len(cols) < 5 {
   239  				return fmt.Errorf("sockstat: error parsing frag line")
   240  			}
   241  			Add(&md, "linux.net.sockets.frag_in_use", cols[2], nil, metadata.Gauge, metadata.Socket, "")
   242  			Add(&md, "linux.net.sockets.frag_mem", cols[4], nil, metadata.Gauge, metadata.Bytes, "")
   243  		}
   244  		return nil
   245  	}); err != nil {
   246  		Error = err
   247  	}
   248  	ln := 0
   249  	var headers []string
   250  	if err := readLine("/proc/net/netstat", func(s string) error {
   251  		cols := strings.Fields(s)
   252  		if ln%2 == 0 {
   253  			headers = cols
   254  		} else {
   255  			if len(cols) < 1 || len(cols) != len(headers) {
   256  				return fmt.Errorf("netstat: parsing failed")
   257  			}
   258  			root := strings.ToLower(strings.TrimSuffix(headers[0], "Ext:"))
   259  			for i, v := range cols[1:] {
   260  				i++
   261  				m := "linux.net.stat." + root + "." + strings.TrimPrefix(strings.ToLower(headers[i]), "tcp")
   262  				Add(&md, m, v, nil, metadata.Counter, metadata.None, "")
   263  			}
   264  		}
   265  		ln += 1
   266  		return nil
   267  	}); err != nil {
   268  		Error = err
   269  	}
   270  	ln = 0
   271  	if err := readLine("/proc/net/snmp", func(s string) error {
   272  		ln++
   273  		if ln%2 != 0 {
   274  			f := strings.Fields(s)
   275  			if len(f) < 2 {
   276  				return fmt.Errorf("Failed to parse header line")
   277  			}
   278  			headers = f
   279  		} else {
   280  			values := strings.Fields(s)
   281  			if len(values) != len(headers) {
   282  				return fmt.Errorf("Mismatched header and value length")
   283  			}
   284  			proto := strings.ToLower(strings.TrimSuffix(values[0], ":"))
   285  			for i, v := range values {
   286  				if i == 0 {
   287  					continue
   288  				}
   289  				var stype metadata.RateType = metadata.Counter
   290  				stat := strings.ToLower(headers[i])
   291  				if strings.HasPrefix(stat, "rto") {
   292  					stype = metadata.Gauge
   293  				}
   294  				Add(&md, "linux.net.stat."+proto+"."+stat, v, nil, stype, metadata.None, "")
   295  			}
   296  		}
   297  		return nil
   298  	}); err != nil {
   299  		Error = err
   300  	}
   301  	if err := readLine("/proc/sys/fs/file-nr", func(s string) error {
   302  		f := strings.Fields(s)
   303  		if len(f) != 3 {
   304  			return fmt.Errorf("unexpected number of fields")
   305  		}
   306  		v, err := strconv.ParseInt(f[0], 10, 64)
   307  		if err != nil {
   308  			return err
   309  		}
   310  		Add(&md, "linux.fs.open", v, nil, metadata.Gauge, metadata.Count, "The number of files presently open.")
   311  		return nil
   312  	}); err != nil {
   313  		Error = err
   314  	}
   315  	return md, Error
   316  }
   317  
   318  func c_interrupts_linux() (opentsdb.MultiDataPoint, error) {
   319  	var md opentsdb.MultiDataPoint
   320  	irq_type_desc := map[string]string{
   321  		"NMI": "Non-maskable interrupts.",
   322  		"LOC": "Local timer interrupts.",
   323  		"SPU": "Spurious interrupts.",
   324  		"PMI": "Performance monitoring interrupts.",
   325  		"IWI": "IRQ work interrupts.",
   326  		"RES": "Rescheduling interrupts.",
   327  		"CAL": "Funcation call interupts.",
   328  		"TLB": "TLB (translation lookaside buffer) shootdowns.",
   329  		"TRM": "Thermal event interrupts.",
   330  		"THR": "Threshold APIC interrupts.",
   331  		"MCE": "Machine check exceptions.",
   332  		"MCP": "Machine Check polls.",
   333  	}
   334  	num_cpus := 0
   335  	if err := readLine("/proc/interrupts", func(s string) error {
   336  		cols := strings.Fields(s)
   337  		if num_cpus == 0 {
   338  			num_cpus = len(cols)
   339  			return nil
   340  		} else if len(cols) < 2 {
   341  			return nil
   342  		}
   343  		irq_type := strings.TrimRight(cols[0], ":")
   344  		if !IsAlNum(irq_type) {
   345  			return nil
   346  		}
   347  		if IsDigit(irq_type) {
   348  			if cols[len(cols)-2] == "PCI-MSI-edge" && strings.Contains(cols[len(cols)-1], "eth") {
   349  				irq_type = cols[len(cols)-1]
   350  			} else {
   351  				// Interrupt type is just a number, ignore.
   352  				return nil
   353  			}
   354  		}
   355  		for i, val := range cols[1:] {
   356  			if i >= num_cpus || !IsDigit(val) {
   357  				// All values read, remaining cols contain textual description.
   358  				break
   359  			}
   360  			Add(&md, "linux.interrupts", val, opentsdb.TagSet{"type": irq_type, "cpu": strconv.Itoa(i)}, metadata.Counter, metadata.Interupt, irq_type_desc[irq_type])
   361  		}
   362  		return nil
   363  	}); err != nil {
   364  		return nil, err
   365  	}
   366  	return md, nil
   367  }
   368  
   369  func c_vmstat_linux() (opentsdb.MultiDataPoint, error) {
   370  	var md opentsdb.MultiDataPoint
   371  	if err := readLine("/proc/vmstat", func(s string) error {
   372  		m := vmstatRE.FindStringSubmatch(s)
   373  		if m == nil {
   374  			return nil
   375  		}
   376  		switch m[1] {
   377  		case "pgpgin", "pgpgout", "pswpin", "pswpout", "pgfault", "pgmajfault":
   378  			mio := inoutRE.FindStringSubmatch(m[1])
   379  			if mio != nil {
   380  				Add(&md, "linux.mem."+mio[1], m[2], opentsdb.TagSet{"direction": mio[2]}, metadata.Counter, metadata.Page, "")
   381  			} else {
   382  				Add(&md, "linux.mem."+m[1], m[2], nil, metadata.Counter, metadata.Page, "")
   383  			}
   384  		default:
   385  			Add(&md, "linux.mem."+m[1], m[2], nil, metadata.Counter, metadata.None, "")
   386  		}
   387  		return nil
   388  	}); err != nil {
   389  		return nil, err
   390  	}
   391  	return md, nil
   392  }