bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/scollector/collectors/disk_linux.go (about) 1 package collectors 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "os" 7 "path/filepath" 8 "regexp" 9 "strconv" 10 "strings" 11 "time" 12 13 "bosun.org/metadata" 14 "bosun.org/opentsdb" 15 "bosun.org/slog" 16 "bosun.org/util" 17 ) 18 19 func init() { 20 collectors = append(collectors, &IntervalCollector{F: c_iostat_linux}) 21 collectors = append(collectors, &IntervalCollector{F: c_dfstat_blocks_linux, Interval: time.Second * 30}) 22 collectors = append(collectors, &IntervalCollector{F: c_dfstat_inodes_linux, Interval: time.Second * 30}) 23 collectors = append(collectors, &IntervalCollector{F: checkMdadmLinux, Interval: 1 * time.Minute}) 24 } 25 26 var diskLinuxFields = []struct { 27 key string 28 rate metadata.RateType 29 unit metadata.Unit 30 desc string 31 }{ 32 {"read_requests", metadata.Counter, metadata.Count, "Total number of reads completed successfully."}, 33 {"read_merged", metadata.Counter, metadata.Count, "Adjacent read requests merged in a single req."}, 34 {"read_sectors", metadata.Counter, metadata.Count, "Total number of sectors read successfully."}, 35 {"msec_read", metadata.Counter, metadata.MilliSecond, "Total number of ms spent by all reads."}, 36 {"write_requests", metadata.Counter, metadata.Count, "Total number of writes completed successfully."}, 37 {"write_merged", metadata.Counter, metadata.Count, " Adjacent write requests merged in a single req."}, 38 {"write_sectors", metadata.Counter, metadata.Count, "Total number of sectors written successfully."}, 39 {"msec_write", metadata.Counter, metadata.MilliSecond, "Total number of ms spent by all writes."}, 40 {"ios_in_progress", metadata.Gauge, metadata.Operation, "Number of actual I/O requests currently in flight."}, 41 {"msec_total", metadata.Counter, metadata.MilliSecond, "Amount of time during which ios_in_progress >= 1."}, 42 {"msec_weighted_total", metadata.Gauge, metadata.MilliSecond, "Measure of recent I/O completion time and backlog."}, 43 } 44 45 var diskLinuxFieldsPart = []struct { 46 key string 47 rate metadata.RateType 48 unit metadata.Unit 49 }{ 50 {"read_issued", metadata.Counter, metadata.Count}, 51 {"read_sectors", metadata.Counter, metadata.Count}, 52 {"write_issued", metadata.Counter, metadata.Count}, 53 {"write_sectors", metadata.Counter, metadata.Count}, 54 } 55 56 func removable(major, minor string) bool { 57 //We don't return an error, because removable may not exist for partitions of a removable device 58 //So this is really "best effort" and we will have to see how it works in practice. 59 b, err := ioutil.ReadFile("/sys/dev/block/" + major + ":" + minor + "/removable") 60 if err != nil { 61 return false 62 } 63 return strings.Trim(string(b), "\n") == "1" 64 } 65 66 var sdiskRE = regexp.MustCompile(`/dev/(sd[a-z])[0-9]?`) 67 68 func removable_fs(name string) bool { 69 s := sdiskRE.FindStringSubmatch(name) 70 if len(s) > 1 { 71 b, err := ioutil.ReadFile("/sys/block/" + s[1] + "/removable") 72 if err != nil { 73 return false 74 } 75 return strings.Trim(string(b), "\n") == "1" 76 } 77 return false 78 } 79 80 func isPseudoFS(name string) (res bool) { 81 err := readLine("/proc/filesystems", func(s string) error { 82 ss := strings.Split(s, "\t") 83 if len(ss) == 2 && ss[1] == name && ss[0] == "nodev" { 84 res = true 85 } 86 return nil 87 }) 88 if err != nil { 89 slog.Errorf("can not read '/proc/filesystems': %v", err) 90 } 91 return 92 } 93 94 func c_iostat_linux() (opentsdb.MultiDataPoint, error) { 95 var md opentsdb.MultiDataPoint 96 var removables []string 97 err := readLine("/proc/diskstats", func(s string) error { 98 values := strings.Fields(s) 99 if len(values) < 4 { 100 return nil 101 } else if values[3] == "0" { 102 // Skip disks that haven't done a single read. 103 return nil 104 } 105 metric := "linux.disk.part." 106 i0, _ := strconv.Atoi(values[0]) 107 i1, _ := strconv.Atoi(values[1]) 108 var block_size int64 109 device := values[2] 110 ts := opentsdb.TagSet{"dev": device} 111 if i1%16 == 0 && i0 > 1 { 112 metric = "linux.disk." 113 if b, err := ioutil.ReadFile("/sys/block/" + device + "/queue/hw_sector_size"); err == nil { 114 block_size, _ = strconv.ParseInt(strings.TrimSpace(string(b)), 10, 64) 115 } 116 } 117 if removable(values[0], values[1]) { 118 removables = append(removables, device) 119 } 120 for _, r := range removables { 121 if strings.HasPrefix(device, r) { 122 metric += "rem." 123 } 124 } 125 if len(values) == 14 { 126 var read_sectors, msec_read, write_sectors, msec_write float64 127 for i, v := range values[3:] { 128 switch diskLinuxFields[i].key { 129 case "read_sectors": 130 read_sectors, _ = strconv.ParseFloat(v, 64) 131 case "msec_read": 132 msec_read, _ = strconv.ParseFloat(v, 64) 133 case "write_sectors": 134 write_sectors, _ = strconv.ParseFloat(v, 64) 135 case "msec_write": 136 msec_write, _ = strconv.ParseFloat(v, 64) 137 } 138 Add(&md, metric+diskLinuxFields[i].key, v, ts, diskLinuxFields[i].rate, diskLinuxFields[i].unit, diskLinuxFields[i].desc) 139 } 140 if read_sectors != 0 && msec_read != 0 { 141 Add(&md, metric+"time_per_read", read_sectors/msec_read, ts, metadata.Rate, metadata.MilliSecond, "") 142 } 143 if write_sectors != 0 && msec_write != 0 { 144 Add(&md, metric+"time_per_write", write_sectors/msec_write, ts, metadata.Rate, metadata.MilliSecond, "") 145 } 146 if block_size != 0 { 147 Add(&md, metric+"bytes", int64(write_sectors)*block_size, opentsdb.TagSet{"type": "write"}.Merge(ts), metadata.Counter, metadata.Bytes, "Total number of bytes written to disk.") 148 Add(&md, metric+"bytes", int64(read_sectors)*block_size, opentsdb.TagSet{"type": "read"}.Merge(ts), metadata.Counter, metadata.Bytes, "Total number of bytes read to disk.") 149 Add(&md, metric+"block_size", block_size, ts, metadata.Gauge, metadata.Bytes, "Sector size of the block device.") 150 } 151 } else if len(values) == 7 { 152 for i, v := range values[3:] { 153 Add(&md, metric+diskLinuxFieldsPart[i].key, v, ts, diskLinuxFieldsPart[i].rate, diskLinuxFieldsPart[i].unit, "") 154 } 155 } else { 156 return fmt.Errorf("cannot parse") 157 } 158 return nil 159 }) 160 return md, err 161 } 162 163 func examineMdadmVolume(volumeName string) (volumeDetail, error) { 164 // command to get mdadm status 165 tmout := 2 * time.Second 166 // We don't use --test because it has failed us in the past. 167 // Maybe we should use it sometime in the future 168 output, err := util.Command(tmout, nil, "mdadm", "--detail", volumeName) 169 if err != nil { 170 return volumeDetail{}, err 171 } 172 detail := parseExamineMdadm(output) 173 return detail, err 174 } 175 176 // keep only fileNames that are devices 177 func filterVolumes(volumes []string) []string { 178 out := make([]string, 0, len(volumes)) 179 for _, vol := range volumes { 180 finfo, err := os.Stat(vol) 181 if err != nil { // if we can't stat, we won't monitor 182 continue 183 } 184 if finfo.Mode()&os.ModeDevice != 0 { 185 out = append(out, vol) 186 } 187 } 188 return out 189 } 190 191 func checkMdadmLinux() (opentsdb.MultiDataPoint, error) { 192 var md opentsdb.MultiDataPoint 193 194 volumes, err := filepath.Glob("/dev/md*") 195 if err != nil { 196 return md, err 197 } 198 for _, volume := range filterVolumes(volumes) { 199 detail, err := examineMdadmVolume(volume) 200 if err != nil { 201 slog.Errorf("mdadm: can't parse %s data, %s", volume, err) 202 continue 203 } 204 addMdadmMetric(&md, volume, detail) 205 } 206 return md, nil 207 } 208 209 func c_dfstat_blocks_linux() (opentsdb.MultiDataPoint, error) { 210 var md opentsdb.MultiDataPoint 211 err := util.ReadCommand(func(line string) error { 212 fields := strings.Fields(line) 213 // TODO: support mount points with spaces in them. They mess up the field order 214 // currently due to df's columnar output. 215 if len(fields) != 7 || !IsDigit(fields[2]) { 216 return nil 217 } 218 // /dev/mapper/vg0-usr ext4 13384816 9996920 2815784 79% /usr 219 fs := fields[0] 220 fsType := fields[1] 221 spaceTotal := fields[2] 222 spaceUsed := fields[3] 223 spaceFree := fields[4] 224 mount := fields[6] 225 if isPseudoFS(fsType) { 226 return nil 227 } 228 tags := opentsdb.TagSet{"mount": mount} 229 os_tags := opentsdb.TagSet{"disk": mount} 230 metric := "linux.disk.fs." 231 ometric := "os.disk.fs." 232 if removable_fs(fs) { 233 metric += "rem." 234 ometric += "rem." 235 } 236 Add(&md, metric+"space_total", spaceTotal, tags, metadata.Gauge, metadata.Bytes, osDiskTotalDesc) 237 Add(&md, metric+"space_used", spaceUsed, tags, metadata.Gauge, metadata.Bytes, osDiskUsedDesc) 238 Add(&md, metric+"space_free", spaceFree, tags, metadata.Gauge, metadata.Bytes, osDiskFreeDesc) 239 Add(&md, ometric+"space_total", spaceTotal, os_tags, metadata.Gauge, metadata.Bytes, osDiskTotalDesc) 240 Add(&md, ometric+"space_used", spaceUsed, os_tags, metadata.Gauge, metadata.Bytes, osDiskUsedDesc) 241 Add(&md, ometric+"space_free", spaceFree, os_tags, metadata.Gauge, metadata.Bytes, osDiskFreeDesc) 242 st, _ := strconv.ParseFloat(spaceTotal, 64) 243 sf, _ := strconv.ParseFloat(spaceFree, 64) 244 if st != 0 { 245 Add(&md, osDiskPctFree, sf/st*100, os_tags, metadata.Gauge, metadata.Pct, osDiskPctFreeDesc) 246 } 247 return nil 248 }, "df", "-lPT", "--block-size", "1") 249 return md, err 250 } 251 252 func c_dfstat_inodes_linux() (opentsdb.MultiDataPoint, error) { 253 var md opentsdb.MultiDataPoint 254 err := util.ReadCommand(func(line string) error { 255 fields := strings.Fields(line) 256 if len(fields) != 7 || !IsDigit(fields[2]) { 257 return nil 258 } 259 // /dev/mapper/vg0-usr ext4 851968 468711 383257 56% /usr 260 fs := fields[0] 261 fsType := fields[1] 262 inodesTotal := fields[2] 263 inodesUsed := fields[3] 264 inodesFree := fields[4] 265 mount := fields[6] 266 if isPseudoFS(fsType) { 267 return nil 268 } 269 tags := opentsdb.TagSet{"mount": mount} 270 metric := "linux.disk.fs." 271 if removable_fs(fs) { 272 metric += "rem." 273 } 274 Add(&md, metric+"inodes_total", inodesTotal, tags, metadata.Gauge, metadata.Count, "") 275 Add(&md, metric+"inodes_used", inodesUsed, tags, metadata.Gauge, metadata.Count, "") 276 Add(&md, metric+"inodes_free", inodesFree, tags, metadata.Gauge, metadata.Count, "") 277 return nil 278 }, "df", "-liPT") 279 return md, err 280 }