bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/disk_linux.go (about)

     1  package collectors
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"path/filepath"
     8  	"regexp"
     9  	"strconv"
    10  	"strings"
    11  	"time"
    12  
    13  	"bosun.org/metadata"
    14  	"bosun.org/opentsdb"
    15  	"bosun.org/slog"
    16  	"bosun.org/util"
    17  )
    18  
    19  func init() {
    20  	collectors = append(collectors, &IntervalCollector{F: c_iostat_linux})
    21  	collectors = append(collectors, &IntervalCollector{F: c_dfstat_blocks_linux, Interval: time.Second * 30})
    22  	collectors = append(collectors, &IntervalCollector{F: c_dfstat_inodes_linux, Interval: time.Second * 30})
    23  	collectors = append(collectors, &IntervalCollector{F: checkMdadmLinux, Interval: 1 * time.Minute})
    24  }
    25  
    26  var diskLinuxFields = []struct {
    27  	key  string
    28  	rate metadata.RateType
    29  	unit metadata.Unit
    30  	desc string
    31  }{
    32  	{"read_requests", metadata.Counter, metadata.Count, "Total number of reads completed successfully."},
    33  	{"read_merged", metadata.Counter, metadata.Count, "Adjacent read requests merged in a single req."},
    34  	{"read_sectors", metadata.Counter, metadata.Count, "Total number of sectors read successfully."},
    35  	{"msec_read", metadata.Counter, metadata.MilliSecond, "Total number of ms spent by all reads."},
    36  	{"write_requests", metadata.Counter, metadata.Count, "Total number of writes completed successfully."},
    37  	{"write_merged", metadata.Counter, metadata.Count, " Adjacent write requests merged in a single req."},
    38  	{"write_sectors", metadata.Counter, metadata.Count, "Total number of sectors written successfully."},
    39  	{"msec_write", metadata.Counter, metadata.MilliSecond, "Total number of ms spent by all writes."},
    40  	{"ios_in_progress", metadata.Gauge, metadata.Operation, "Number of actual I/O requests currently in flight."},
    41  	{"msec_total", metadata.Counter, metadata.MilliSecond, "Amount of time during which ios_in_progress >= 1."},
    42  	{"msec_weighted_total", metadata.Gauge, metadata.MilliSecond, "Measure of recent I/O completion time and backlog."},
    43  }
    44  
    45  var diskLinuxFieldsPart = []struct {
    46  	key  string
    47  	rate metadata.RateType
    48  	unit metadata.Unit
    49  }{
    50  	{"read_issued", metadata.Counter, metadata.Count},
    51  	{"read_sectors", metadata.Counter, metadata.Count},
    52  	{"write_issued", metadata.Counter, metadata.Count},
    53  	{"write_sectors", metadata.Counter, metadata.Count},
    54  }
    55  
    56  func removable(major, minor string) bool {
    57  	//We don't return an error, because removable may not exist for partitions of a removable device
    58  	//So this is really "best effort" and we will have to see how it works in practice.
    59  	b, err := ioutil.ReadFile("/sys/dev/block/" + major + ":" + minor + "/removable")
    60  	if err != nil {
    61  		return false
    62  	}
    63  	return strings.Trim(string(b), "\n") == "1"
    64  }
    65  
    66  var sdiskRE = regexp.MustCompile(`/dev/(sd[a-z])[0-9]?`)
    67  
    68  func removable_fs(name string) bool {
    69  	s := sdiskRE.FindStringSubmatch(name)
    70  	if len(s) > 1 {
    71  		b, err := ioutil.ReadFile("/sys/block/" + s[1] + "/removable")
    72  		if err != nil {
    73  			return false
    74  		}
    75  		return strings.Trim(string(b), "\n") == "1"
    76  	}
    77  	return false
    78  }
    79  
    80  func isPseudoFS(name string) (res bool) {
    81  	err := readLine("/proc/filesystems", func(s string) error {
    82  		ss := strings.Split(s, "\t")
    83  		if len(ss) == 2 && ss[1] == name && ss[0] == "nodev" {
    84  			res = true
    85  		}
    86  		return nil
    87  	})
    88  	if err != nil {
    89  		slog.Errorf("can not read '/proc/filesystems': %v", err)
    90  	}
    91  	return
    92  }
    93  
    94  func c_iostat_linux() (opentsdb.MultiDataPoint, error) {
    95  	var md opentsdb.MultiDataPoint
    96  	var removables []string
    97  	err := readLine("/proc/diskstats", func(s string) error {
    98  		values := strings.Fields(s)
    99  		if len(values) < 4 {
   100  			return nil
   101  		} else if values[3] == "0" {
   102  			// Skip disks that haven't done a single read.
   103  			return nil
   104  		}
   105  		metric := "linux.disk.part."
   106  		i0, _ := strconv.Atoi(values[0])
   107  		i1, _ := strconv.Atoi(values[1])
   108  		var block_size int64
   109  		device := values[2]
   110  		ts := opentsdb.TagSet{"dev": device}
   111  		if i1%16 == 0 && i0 > 1 {
   112  			metric = "linux.disk."
   113  			if b, err := ioutil.ReadFile("/sys/block/" + device + "/queue/hw_sector_size"); err == nil {
   114  				block_size, _ = strconv.ParseInt(strings.TrimSpace(string(b)), 10, 64)
   115  			}
   116  		}
   117  		if removable(values[0], values[1]) {
   118  			removables = append(removables, device)
   119  		}
   120  		for _, r := range removables {
   121  			if strings.HasPrefix(device, r) {
   122  				metric += "rem."
   123  			}
   124  		}
   125  		if len(values) == 14 {
   126  			var read_sectors, msec_read, write_sectors, msec_write float64
   127  			for i, v := range values[3:] {
   128  				switch diskLinuxFields[i].key {
   129  				case "read_sectors":
   130  					read_sectors, _ = strconv.ParseFloat(v, 64)
   131  				case "msec_read":
   132  					msec_read, _ = strconv.ParseFloat(v, 64)
   133  				case "write_sectors":
   134  					write_sectors, _ = strconv.ParseFloat(v, 64)
   135  				case "msec_write":
   136  					msec_write, _ = strconv.ParseFloat(v, 64)
   137  				}
   138  				Add(&md, metric+diskLinuxFields[i].key, v, ts, diskLinuxFields[i].rate, diskLinuxFields[i].unit, diskLinuxFields[i].desc)
   139  			}
   140  			if read_sectors != 0 && msec_read != 0 {
   141  				Add(&md, metric+"time_per_read", read_sectors/msec_read, ts, metadata.Rate, metadata.MilliSecond, "")
   142  			}
   143  			if write_sectors != 0 && msec_write != 0 {
   144  				Add(&md, metric+"time_per_write", write_sectors/msec_write, ts, metadata.Rate, metadata.MilliSecond, "")
   145  			}
   146  			if block_size != 0 {
   147  				Add(&md, metric+"bytes", int64(write_sectors)*block_size, opentsdb.TagSet{"type": "write"}.Merge(ts), metadata.Counter, metadata.Bytes, "Total number of bytes written to disk.")
   148  				Add(&md, metric+"bytes", int64(read_sectors)*block_size, opentsdb.TagSet{"type": "read"}.Merge(ts), metadata.Counter, metadata.Bytes, "Total number of bytes read to disk.")
   149  				Add(&md, metric+"block_size", block_size, ts, metadata.Gauge, metadata.Bytes, "Sector size of the block device.")
   150  			}
   151  		} else if len(values) == 7 {
   152  			for i, v := range values[3:] {
   153  				Add(&md, metric+diskLinuxFieldsPart[i].key, v, ts, diskLinuxFieldsPart[i].rate, diskLinuxFieldsPart[i].unit, "")
   154  			}
   155  		} else {
   156  			return fmt.Errorf("cannot parse")
   157  		}
   158  		return nil
   159  	})
   160  	return md, err
   161  }
   162  
   163  func examineMdadmVolume(volumeName string) (volumeDetail, error) {
   164  	// command to get mdadm status
   165  	tmout := 2 * time.Second
   166  	// We don't use --test because it has failed us in the past.
   167  	// Maybe we should use it sometime in the future
   168  	output, err := util.Command(tmout, nil, "mdadm", "--detail", volumeName)
   169  	if err != nil {
   170  		return volumeDetail{}, err
   171  	}
   172  	detail := parseExamineMdadm(output)
   173  	return detail, err
   174  }
   175  
   176  // keep only fileNames that are devices
   177  func filterVolumes(volumes []string) []string {
   178  	out := make([]string, 0, len(volumes))
   179  	for _, vol := range volumes {
   180  		finfo, err := os.Stat(vol)
   181  		if err != nil { // if we can't stat, we won't monitor
   182  			continue
   183  		}
   184  		if finfo.Mode()&os.ModeDevice != 0 {
   185  			out = append(out, vol)
   186  		}
   187  	}
   188  	return out
   189  }
   190  
   191  func checkMdadmLinux() (opentsdb.MultiDataPoint, error) {
   192  	var md opentsdb.MultiDataPoint
   193  
   194  	volumes, err := filepath.Glob("/dev/md*")
   195  	if err != nil {
   196  		return md, err
   197  	}
   198  	for _, volume := range filterVolumes(volumes) {
   199  		detail, err := examineMdadmVolume(volume)
   200  		if err != nil {
   201  			slog.Errorf("mdadm: can't parse %s data, %s", volume, err)
   202  			continue
   203  		}
   204  		addMdadmMetric(&md, volume, detail)
   205  	}
   206  	return md, nil
   207  }
   208  
   209  func c_dfstat_blocks_linux() (opentsdb.MultiDataPoint, error) {
   210  	var md opentsdb.MultiDataPoint
   211  	err := util.ReadCommand(func(line string) error {
   212  		fields := strings.Fields(line)
   213  		// TODO: support mount points with spaces in them. They mess up the field order
   214  		// currently due to df's columnar output.
   215  		if len(fields) != 7 || !IsDigit(fields[2]) {
   216  			return nil
   217  		}
   218  		// /dev/mapper/vg0-usr ext4 13384816 9996920 2815784 79% /usr
   219  		fs := fields[0]
   220  		fsType := fields[1]
   221  		spaceTotal := fields[2]
   222  		spaceUsed := fields[3]
   223  		spaceFree := fields[4]
   224  		mount := fields[6]
   225  		if isPseudoFS(fsType) {
   226  			return nil
   227  		}
   228  		tags := opentsdb.TagSet{"mount": mount}
   229  		os_tags := opentsdb.TagSet{"disk": mount}
   230  		metric := "linux.disk.fs."
   231  		ometric := "os.disk.fs."
   232  		if removable_fs(fs) {
   233  			metric += "rem."
   234  			ometric += "rem."
   235  		}
   236  		Add(&md, metric+"space_total", spaceTotal, tags, metadata.Gauge, metadata.Bytes, osDiskTotalDesc)
   237  		Add(&md, metric+"space_used", spaceUsed, tags, metadata.Gauge, metadata.Bytes, osDiskUsedDesc)
   238  		Add(&md, metric+"space_free", spaceFree, tags, metadata.Gauge, metadata.Bytes, osDiskFreeDesc)
   239  		Add(&md, ometric+"space_total", spaceTotal, os_tags, metadata.Gauge, metadata.Bytes, osDiskTotalDesc)
   240  		Add(&md, ometric+"space_used", spaceUsed, os_tags, metadata.Gauge, metadata.Bytes, osDiskUsedDesc)
   241  		Add(&md, ometric+"space_free", spaceFree, os_tags, metadata.Gauge, metadata.Bytes, osDiskFreeDesc)
   242  		st, _ := strconv.ParseFloat(spaceTotal, 64)
   243  		sf, _ := strconv.ParseFloat(spaceFree, 64)
   244  		if st != 0 {
   245  			Add(&md, osDiskPctFree, sf/st*100, os_tags, metadata.Gauge, metadata.Pct, osDiskPctFreeDesc)
   246  		}
   247  		return nil
   248  	}, "df", "-lPT", "--block-size", "1")
   249  	return md, err
   250  }
   251  
   252  func c_dfstat_inodes_linux() (opentsdb.MultiDataPoint, error) {
   253  	var md opentsdb.MultiDataPoint
   254  	err := util.ReadCommand(func(line string) error {
   255  		fields := strings.Fields(line)
   256  		if len(fields) != 7 || !IsDigit(fields[2]) {
   257  			return nil
   258  		}
   259  		// /dev/mapper/vg0-usr ext4 851968 468711 383257 56% /usr
   260  		fs := fields[0]
   261  		fsType := fields[1]
   262  		inodesTotal := fields[2]
   263  		inodesUsed := fields[3]
   264  		inodesFree := fields[4]
   265  		mount := fields[6]
   266  		if isPseudoFS(fsType) {
   267  			return nil
   268  		}
   269  		tags := opentsdb.TagSet{"mount": mount}
   270  		metric := "linux.disk.fs."
   271  		if removable_fs(fs) {
   272  			metric += "rem."
   273  		}
   274  		Add(&md, metric+"inodes_total", inodesTotal, tags, metadata.Gauge, metadata.Count, "")
   275  		Add(&md, metric+"inodes_used", inodesUsed, tags, metadata.Gauge, metadata.Count, "")
   276  		Add(&md, metric+"inodes_free", inodesFree, tags, metadata.Gauge, metadata.Count, "")
   277  		return nil
   278  	}, "df", "-liPT")
   279  	return md, err
   280  }