github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/status/runtime.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package status 12 13 import ( 14 "context" 15 "fmt" 16 "os" 17 "runtime" 18 "runtime/debug" 19 "time" 20 21 "github.com/cockroachdb/cockroach/pkg/build" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/metric" 25 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 26 "github.com/dustin/go-humanize" 27 "github.com/elastic/gosigar" 28 "github.com/shirou/gopsutil/net" 29 ) 30 31 var ( 32 metaCgoCalls = metric.Metadata{ 33 Name: "sys.cgocalls", 34 Help: "Total number of cgo calls", 35 Measurement: "cgo Calls", 36 Unit: metric.Unit_COUNT, 37 } 38 metaGoroutines = metric.Metadata{ 39 Name: "sys.goroutines", 40 Help: "Current number of goroutines", 41 Measurement: "goroutines", 42 Unit: metric.Unit_COUNT, 43 } 44 metaGoAllocBytes = metric.Metadata{ 45 Name: "sys.go.allocbytes", 46 Help: "Current bytes of memory allocated by go", 47 Measurement: "Memory", 48 Unit: metric.Unit_BYTES, 49 } 50 metaGoTotalBytes = metric.Metadata{ 51 Name: "sys.go.totalbytes", 52 Help: "Total bytes of memory allocated by go, but not released", 53 Measurement: "Memory", 54 Unit: metric.Unit_BYTES, 55 } 56 metaCgoAllocBytes = metric.Metadata{ 57 Name: "sys.cgo.allocbytes", 58 Help: "Current bytes of memory allocated by cgo", 59 Measurement: "Memory", 60 Unit: metric.Unit_BYTES, 61 } 62 metaCgoTotalBytes = metric.Metadata{ 63 Name: "sys.cgo.totalbytes", 64 Help: "Total bytes of memory allocated by cgo, but not released", 65 Measurement: "Memory", 66 Unit: metric.Unit_BYTES, 67 } 68 metaGCCount = metric.Metadata{ 69 Name: "sys.gc.count", 70 Help: "Total number of GC runs", 71 Measurement: "GC Runs", 72 Unit: metric.Unit_COUNT, 73 } 74 metaGCPauseNS = metric.Metadata{ 75 Name: "sys.gc.pause.ns", 76 Help: "Total GC pause", 77 Measurement: "GC Pause", 78 Unit: metric.Unit_NANOSECONDS, 79 } 80 metaGCPausePercent = metric.Metadata{ 81 Name: "sys.gc.pause.percent", 82 Help: "Current GC pause percentage", 83 Measurement: "GC Pause", 84 Unit: metric.Unit_PERCENT, 85 } 86 metaCPUUserNS = metric.Metadata{ 87 Name: "sys.cpu.user.ns", 88 Help: "Total user cpu time", 89 Measurement: "CPU Time", 90 Unit: metric.Unit_NANOSECONDS, 91 } 92 metaCPUUserPercent = metric.Metadata{ 93 Name: "sys.cpu.user.percent", 94 Help: "Current user cpu percentage", 95 Measurement: "CPU Time", 96 Unit: metric.Unit_PERCENT, 97 } 98 metaCPUSysNS = metric.Metadata{ 99 Name: "sys.cpu.sys.ns", 100 Help: "Total system cpu time", 101 Measurement: "CPU Time", 102 Unit: metric.Unit_NANOSECONDS, 103 } 104 metaCPUSysPercent = metric.Metadata{ 105 Name: "sys.cpu.sys.percent", 106 Help: "Current system cpu percentage", 107 Measurement: "CPU Time", 108 Unit: metric.Unit_PERCENT, 109 } 110 metaCPUCombinedPercentNorm = metric.Metadata{ 111 Name: "sys.cpu.combined.percent-normalized", 112 Help: "Current user+system cpu percentage, normalized 0-1 by number of cores", 113 Measurement: "CPU Time", 114 Unit: metric.Unit_PERCENT, 115 } 116 metaRSSBytes = metric.Metadata{ 117 Name: "sys.rss", 118 Help: "Current process RSS", 119 Measurement: "RSS", 120 Unit: metric.Unit_BYTES, 121 } 122 metaFDOpen = metric.Metadata{ 123 Name: "sys.fd.open", 124 Help: "Process open file descriptors", 125 Measurement: "File Descriptors", 126 Unit: metric.Unit_COUNT, 127 } 128 metaFDSoftLimit = metric.Metadata{ 129 Name: "sys.fd.softlimit", 130 Help: "Process open FD soft limit", 131 Measurement: "File Descriptors", 132 Unit: metric.Unit_COUNT, 133 } 134 metaUptime = metric.Metadata{ 135 Name: "sys.uptime", 136 Help: "Process uptime", 137 Measurement: "Uptime", 138 Unit: metric.Unit_SECONDS, 139 } 140 141 // These disk and network stats are counters of the number of operations, packets, bytes, and 142 // cumulative time of the disk and net IO that has been done across the whole host *since this 143 // Cockroach process started up*. By taking the derivatives of these metrics, we can see the 144 // IO throughput. 145 metaHostDiskReadCount = metric.Metadata{ 146 Name: "sys.host.disk.read.count", 147 Unit: metric.Unit_COUNT, 148 Measurement: "Operations", 149 Help: "Disk read operations across all disks since this process started", 150 } 151 metaHostDiskReadBytes = metric.Metadata{ 152 Name: "sys.host.disk.read.bytes", 153 Unit: metric.Unit_BYTES, 154 Measurement: "Bytes", 155 Help: "Bytes read from all disks since this process started", 156 } 157 metaHostDiskReadTime = metric.Metadata{ 158 Name: "sys.host.disk.read.time", 159 Unit: metric.Unit_NANOSECONDS, 160 Measurement: "Time", 161 Help: "Time spent reading from all disks since this process started", 162 } 163 metaHostDiskWriteCount = metric.Metadata{ 164 Name: "sys.host.disk.write.count", 165 Unit: metric.Unit_COUNT, 166 Measurement: "Operations", 167 Help: "Disk write operations across all disks since this process started", 168 } 169 metaHostDiskWriteBytes = metric.Metadata{ 170 Name: "sys.host.disk.write.bytes", 171 Unit: metric.Unit_BYTES, 172 Measurement: "Bytes", 173 Help: "Bytes written to all disks since this process started", 174 } 175 metaHostDiskWriteTime = metric.Metadata{ 176 Name: "sys.host.disk.write.time", 177 Unit: metric.Unit_NANOSECONDS, 178 Measurement: "Time", 179 Help: "Time spent writing to all disks since this process started", 180 } 181 metaHostDiskIOTime = metric.Metadata{ 182 Name: "sys.host.disk.io.time", 183 Unit: metric.Unit_NANOSECONDS, 184 Measurement: "Time", 185 Help: "Time spent reading from or writing to all disks since this process started", 186 } 187 metaHostDiskWeightedIOTime = metric.Metadata{ 188 Name: "sys.host.disk.weightedio.time", 189 Unit: metric.Unit_NANOSECONDS, 190 Measurement: "Time", 191 Help: "Weighted time spent reading from or writing to to all disks since this process started", 192 } 193 metaHostIopsInProgress = metric.Metadata{ 194 Name: "sys.host.disk.iopsinprogress", 195 Unit: metric.Unit_COUNT, 196 Measurement: "Operations", 197 Help: "IO operations currently in progress on this host", 198 } 199 metaHostNetRecvBytes = metric.Metadata{ 200 Name: "sys.host.net.recv.bytes", 201 Unit: metric.Unit_BYTES, 202 Measurement: "Bytes", 203 Help: "Bytes received on all network interfaces since this process started", 204 } 205 metaHostNetRecvPackets = metric.Metadata{ 206 Name: "sys.host.net.recv.packets", 207 Unit: metric.Unit_COUNT, 208 Measurement: "Packets", 209 Help: "Packets received on all network interfaces since this process started", 210 } 211 metaHostNetSendBytes = metric.Metadata{ 212 Name: "sys.host.net.send.bytes", 213 Unit: metric.Unit_BYTES, 214 Measurement: "Bytes", 215 Help: "Bytes sent on all network interfaces since this process started", 216 } 217 metaHostNetSendPackets = metric.Metadata{ 218 Name: "sys.host.net.send.packets", 219 Unit: metric.Unit_COUNT, 220 Measurement: "Packets", 221 Help: "Packets sent on all network interfaces since this process started", 222 } 223 ) 224 225 // getCgoMemStats is a function that fetches stats for the C++ portion of the code. 226 // We will not necessarily have implementations for all builds, so check for nil first. 227 // Returns the following: 228 // allocated uint: bytes allocated by application 229 // total uint: total bytes requested from system 230 // error : any issues fetching stats. This should be a warning only. 231 var getCgoMemStats func(context.Context) (uint, uint, error) 232 233 // RuntimeStatSampler is used to periodically sample the runtime environment 234 // for useful statistics, performing some rudimentary calculations and storing 235 // the resulting information in a format that can be easily consumed by status 236 // logging systems. 237 type RuntimeStatSampler struct { 238 clock *hlc.Clock 239 240 startTimeNanos int64 241 // The last sampled values of some statistics are kept only to compute 242 // derivative statistics. 243 last struct { 244 now int64 245 utime int64 246 stime int64 247 cgoCall int64 248 gcCount int64 249 gcPauseTime uint64 250 disk diskStats 251 net net.IOCountersStat 252 } 253 254 initialDiskCounters diskStats 255 initialNetCounters net.IOCountersStat 256 257 // Only show "not implemented" errors once, we don't need the log spam. 258 fdUsageNotImplemented bool 259 260 // Metric gauges maintained by the sampler. 261 // Go runtime stats. 262 CgoCalls *metric.Gauge 263 Goroutines *metric.Gauge 264 GoAllocBytes *metric.Gauge 265 GoTotalBytes *metric.Gauge 266 CgoAllocBytes *metric.Gauge 267 CgoTotalBytes *metric.Gauge 268 GcCount *metric.Gauge 269 GcPauseNS *metric.Gauge 270 GcPausePercent *metric.GaugeFloat64 271 // CPU stats. 272 CPUUserNS *metric.Gauge 273 CPUUserPercent *metric.GaugeFloat64 274 CPUSysNS *metric.Gauge 275 CPUSysPercent *metric.GaugeFloat64 276 CPUCombinedPercentNorm *metric.GaugeFloat64 277 // Memory stats. 278 RSSBytes *metric.Gauge 279 // File descriptor stats. 280 FDOpen *metric.Gauge 281 FDSoftLimit *metric.Gauge 282 // Disk and network stats. 283 HostDiskReadBytes *metric.Gauge 284 HostDiskReadCount *metric.Gauge 285 HostDiskReadTime *metric.Gauge 286 HostDiskWriteBytes *metric.Gauge 287 HostDiskWriteCount *metric.Gauge 288 HostDiskWriteTime *metric.Gauge 289 HostDiskIOTime *metric.Gauge 290 HostDiskWeightedIOTime *metric.Gauge 291 IopsInProgress *metric.Gauge 292 HostNetRecvBytes *metric.Gauge 293 HostNetRecvPackets *metric.Gauge 294 HostNetSendBytes *metric.Gauge 295 HostNetSendPackets *metric.Gauge 296 // Uptime and build. 297 Uptime *metric.Gauge // We use a gauge to be able to call Update. 298 BuildTimestamp *metric.Gauge 299 } 300 301 // NewRuntimeStatSampler constructs a new RuntimeStatSampler object. 302 func NewRuntimeStatSampler(ctx context.Context, clock *hlc.Clock) *RuntimeStatSampler { 303 // Construct the build info metric. It is constant. 304 // We first build set the labels on the metadata. 305 info := build.GetInfo() 306 timestamp, err := info.Timestamp() 307 if err != nil { 308 // We can't panic here, tests don't have a build timestamp. 309 log.Warningf(ctx, "Could not parse build timestamp: %v", err) 310 } 311 312 // Build information. 313 metaBuildTimestamp := metric.Metadata{ 314 Name: "build.timestamp", 315 Help: "Build information", 316 Measurement: "Build Time", 317 Unit: metric.Unit_TIMESTAMP_SEC, 318 } 319 metaBuildTimestamp.AddLabel("tag", info.Tag) 320 metaBuildTimestamp.AddLabel("go_version", info.GoVersion) 321 322 buildTimestamp := metric.NewGauge(metaBuildTimestamp) 323 buildTimestamp.Update(timestamp) 324 325 diskCounters, err := getSummedDiskCounters(ctx) 326 if err != nil { 327 log.Errorf(ctx, "could not get initial disk IO counters: %v", err) 328 } 329 netCounters, err := getSummedNetStats(ctx) 330 if err != nil { 331 log.Errorf(ctx, "could not get initial disk IO counters: %v", err) 332 } 333 334 rsr := &RuntimeStatSampler{ 335 clock: clock, 336 startTimeNanos: clock.PhysicalNow(), 337 initialNetCounters: netCounters, 338 initialDiskCounters: diskCounters, 339 CgoCalls: metric.NewGauge(metaCgoCalls), 340 Goroutines: metric.NewGauge(metaGoroutines), 341 GoAllocBytes: metric.NewGauge(metaGoAllocBytes), 342 GoTotalBytes: metric.NewGauge(metaGoTotalBytes), 343 CgoAllocBytes: metric.NewGauge(metaCgoAllocBytes), 344 CgoTotalBytes: metric.NewGauge(metaCgoTotalBytes), 345 GcCount: metric.NewGauge(metaGCCount), 346 GcPauseNS: metric.NewGauge(metaGCPauseNS), 347 GcPausePercent: metric.NewGaugeFloat64(metaGCPausePercent), 348 CPUUserNS: metric.NewGauge(metaCPUUserNS), 349 CPUUserPercent: metric.NewGaugeFloat64(metaCPUUserPercent), 350 CPUSysNS: metric.NewGauge(metaCPUSysNS), 351 CPUSysPercent: metric.NewGaugeFloat64(metaCPUSysPercent), 352 CPUCombinedPercentNorm: metric.NewGaugeFloat64(metaCPUCombinedPercentNorm), 353 RSSBytes: metric.NewGauge(metaRSSBytes), 354 HostDiskReadBytes: metric.NewGauge(metaHostDiskReadBytes), 355 HostDiskReadCount: metric.NewGauge(metaHostDiskReadCount), 356 HostDiskReadTime: metric.NewGauge(metaHostDiskReadTime), 357 HostDiskWriteBytes: metric.NewGauge(metaHostDiskWriteBytes), 358 HostDiskWriteCount: metric.NewGauge(metaHostDiskWriteCount), 359 HostDiskWriteTime: metric.NewGauge(metaHostDiskWriteTime), 360 HostDiskIOTime: metric.NewGauge(metaHostDiskIOTime), 361 HostDiskWeightedIOTime: metric.NewGauge(metaHostDiskWeightedIOTime), 362 IopsInProgress: metric.NewGauge(metaHostIopsInProgress), 363 HostNetRecvBytes: metric.NewGauge(metaHostNetRecvBytes), 364 HostNetRecvPackets: metric.NewGauge(metaHostNetRecvPackets), 365 HostNetSendBytes: metric.NewGauge(metaHostNetSendBytes), 366 HostNetSendPackets: metric.NewGauge(metaHostNetSendPackets), 367 FDOpen: metric.NewGauge(metaFDOpen), 368 FDSoftLimit: metric.NewGauge(metaFDSoftLimit), 369 Uptime: metric.NewGauge(metaUptime), 370 BuildTimestamp: buildTimestamp, 371 } 372 rsr.last.disk = rsr.initialDiskCounters 373 rsr.last.net = rsr.initialNetCounters 374 return rsr 375 } 376 377 // GoMemStats groups a runtime.MemStats structure with the timestamp when it 378 // was collected. 379 type GoMemStats struct { 380 runtime.MemStats 381 // Collected is the timestamp at which these values were collected. 382 Collected time.Time 383 } 384 385 // SampleEnvironment queries the runtime system for various interesting metrics, 386 // storing the resulting values in the set of metric gauges maintained by 387 // RuntimeStatSampler. This makes runtime statistics more convenient for 388 // consumption by the time series and status systems. 389 // 390 // This method should be called periodically by a higher level system in order 391 // to keep runtime statistics current. 392 // 393 // SampleEnvironment takes GoMemStats as input because that is collected 394 // separately, on a different schedule. 395 func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, ms GoMemStats) { 396 // Note that debug.ReadGCStats() does not suffer the same problem as 397 // runtime.ReadMemStats(). The only way you can know that is by reading the 398 // source. 399 gc := &debug.GCStats{} 400 debug.ReadGCStats(gc) 401 402 numCgoCall := runtime.NumCgoCall() 403 numGoroutine := runtime.NumGoroutine() 404 405 // Retrieve Mem and CPU statistics. 406 pid := os.Getpid() 407 mem := gosigar.ProcMem{} 408 if err := mem.Get(pid); err != nil { 409 log.Errorf(ctx, "unable to get mem usage: %v", err) 410 } 411 cpuTime := gosigar.ProcTime{} 412 if err := cpuTime.Get(pid); err != nil { 413 log.Errorf(ctx, "unable to get cpu usage: %v", err) 414 } 415 416 fds := gosigar.ProcFDUsage{} 417 if err := fds.Get(pid); err != nil { 418 if gosigar.IsNotImplemented(err) { 419 if !rsr.fdUsageNotImplemented { 420 rsr.fdUsageNotImplemented = true 421 log.Warningf(ctx, "unable to get file descriptor usage (will not try again): %s", err) 422 } 423 } else { 424 log.Errorf(ctx, "unable to get file descriptor usage: %s", err) 425 } 426 } 427 428 var deltaDisk diskStats 429 diskCounters, err := getSummedDiskCounters(ctx) 430 if err != nil { 431 log.Warningf(ctx, "problem fetching disk stats: %s; disk stats will be empty.", err) 432 } else { 433 deltaDisk = diskCounters 434 subtractDiskCounters(&deltaDisk, rsr.last.disk) 435 rsr.last.disk = diskCounters 436 subtractDiskCounters(&diskCounters, rsr.initialDiskCounters) 437 438 rsr.HostDiskReadBytes.Update(diskCounters.readBytes) 439 rsr.HostDiskReadCount.Update(diskCounters.readCount) 440 rsr.HostDiskReadTime.Update(int64(diskCounters.readTime)) 441 rsr.HostDiskWriteBytes.Update(diskCounters.writeBytes) 442 rsr.HostDiskWriteCount.Update(diskCounters.writeCount) 443 rsr.HostDiskWriteTime.Update(int64(diskCounters.writeTime)) 444 rsr.HostDiskIOTime.Update(int64(diskCounters.ioTime)) 445 rsr.HostDiskWeightedIOTime.Update(int64(diskCounters.weightedIOTime)) 446 rsr.IopsInProgress.Update(diskCounters.iopsInProgress) 447 } 448 449 var deltaNet net.IOCountersStat 450 netCounters, err := getSummedNetStats(ctx) 451 if err != nil { 452 log.Warningf(ctx, "problem fetching net stats: %s; net stats will be empty.", err) 453 } else { 454 deltaNet = netCounters 455 subtractNetworkCounters(&deltaNet, rsr.last.net) 456 rsr.last.net = netCounters 457 subtractNetworkCounters(&netCounters, rsr.initialNetCounters) 458 459 rsr.HostNetSendBytes.Update(int64(netCounters.BytesSent)) 460 rsr.HostNetSendPackets.Update(int64(netCounters.PacketsSent)) 461 rsr.HostNetRecvBytes.Update(int64(netCounters.BytesRecv)) 462 rsr.HostNetRecvPackets.Update(int64(netCounters.PacketsRecv)) 463 } 464 465 // Time statistics can be compared to the total elapsed time to create a 466 // useful percentage of total CPU usage, which would be somewhat less accurate 467 // if calculated later using downsampled time series data. 468 now := rsr.clock.PhysicalNow() 469 dur := float64(now - rsr.last.now) 470 // cpuTime.{User,Sys} are in milliseconds, convert to nanoseconds. 471 utime := int64(cpuTime.User) * 1e6 472 stime := int64(cpuTime.Sys) * 1e6 473 uPerc := float64(utime-rsr.last.utime) / dur 474 sPerc := float64(stime-rsr.last.stime) / dur 475 combinedNormalizedPerc := (sPerc + uPerc) / float64(runtime.NumCPU()) 476 gcPausePercent := float64(uint64(gc.PauseTotal)-rsr.last.gcPauseTime) / dur 477 rsr.last.now = now 478 rsr.last.utime = utime 479 rsr.last.stime = stime 480 rsr.last.gcPauseTime = uint64(gc.PauseTotal) 481 482 var cgoAllocated, cgoTotal uint 483 if getCgoMemStats != nil { 484 var err error 485 cgoAllocated, cgoTotal, err = getCgoMemStats(ctx) 486 if err != nil { 487 log.Warningf(ctx, "problem fetching CGO memory stats: %s; CGO stats will be empty.", err) 488 } 489 } 490 491 // Log summary of statistics to console. 492 cgoRate := float64((numCgoCall-rsr.last.cgoCall)*int64(time.Second)) / dur 493 goMemStatsStale := timeutil.Now().Sub(ms.Collected) > time.Second 494 var staleMsg = "" 495 if goMemStatsStale { 496 staleMsg = "(stale)" 497 } 498 goTotal := ms.Sys - ms.HeapReleased 499 log.Infof(ctx, "%s", log.Safe(fmt.Sprintf("runtime stats: %s RSS, %d goroutines, %s/%s/%s GO alloc/idle/total%s, "+ 500 "%s/%s CGO alloc/total, %.1f CGO/sec, %.1f/%.1f %%(u/s)time, %.1f %%gc (%dx), "+ 501 "%s/%s (r/w)net", 502 humanize.IBytes(mem.Resident), numGoroutine, 503 humanize.IBytes(ms.HeapAlloc), humanize.IBytes(ms.HeapIdle), humanize.IBytes(goTotal), 504 staleMsg, 505 humanize.IBytes(uint64(cgoAllocated)), humanize.IBytes(uint64(cgoTotal)), 506 cgoRate, 100*uPerc, 100*sPerc, 100*gcPausePercent, gc.NumGC-rsr.last.gcCount, 507 humanize.IBytes(deltaNet.BytesRecv), humanize.IBytes(deltaNet.BytesSent), 508 ))) 509 rsr.last.cgoCall = numCgoCall 510 rsr.last.gcCount = gc.NumGC 511 512 rsr.GoAllocBytes.Update(int64(ms.HeapAlloc)) 513 rsr.GoTotalBytes.Update(int64(goTotal)) 514 rsr.CgoCalls.Update(numCgoCall) 515 rsr.Goroutines.Update(int64(numGoroutine)) 516 rsr.CgoAllocBytes.Update(int64(cgoAllocated)) 517 rsr.CgoTotalBytes.Update(int64(cgoTotal)) 518 rsr.GcCount.Update(gc.NumGC) 519 rsr.GcPauseNS.Update(int64(gc.PauseTotal)) 520 rsr.GcPausePercent.Update(gcPausePercent) 521 rsr.CPUUserNS.Update(utime) 522 rsr.CPUUserPercent.Update(uPerc) 523 rsr.CPUSysNS.Update(stime) 524 rsr.CPUSysPercent.Update(sPerc) 525 rsr.CPUCombinedPercentNorm.Update(combinedNormalizedPerc) 526 rsr.FDOpen.Update(int64(fds.Open)) 527 rsr.FDSoftLimit.Update(int64(fds.SoftLimit)) 528 rsr.RSSBytes.Update(int64(mem.Resident)) 529 rsr.Uptime.Update((now - rsr.startTimeNanos) / 1e9) 530 } 531 532 // GetCPUCombinedPercentNorm is part of the rowexec.RuntimeStats interface. 533 func (rsr *RuntimeStatSampler) GetCPUCombinedPercentNorm() float64 { 534 return rsr.CPUCombinedPercentNorm.Value() 535 } 536 537 // diskStats contains the disk statistics returned by the operating 538 // system. Interpretation of some of these stats varies by platform, 539 // although as much as possible they are normalized to the semantics 540 // used by linux's diskstats interface. 541 // 542 // Except for iopsInProgress, these metrics act like counters (always 543 // increasing, and best interpreted as a rate). 544 type diskStats struct { 545 readBytes int64 546 readCount int64 547 548 // readTime (and writeTime) may increase more than 1s per second if 549 // access to storage is parallelized. 550 readTime time.Duration 551 552 writeBytes int64 553 writeCount int64 554 writeTime time.Duration 555 556 // ioTime is the amount of time that iopsInProgress is non-zero (so 557 // its increase is capped at 1s/s). Only available on linux. 558 ioTime time.Duration 559 560 // weightedIOTime is a linux-specific metric that attempts to 561 // represent "an easy measure of both I/O completion time and the 562 // backlog that may be accumulating." 563 weightedIOTime time.Duration 564 565 // iopsInProgress is a gauge of the number of pending IO operations. 566 // Not available on macOS. 567 iopsInProgress int64 568 } 569 570 func getSummedDiskCounters(ctx context.Context) (diskStats, error) { 571 diskCounters, err := getDiskCounters(ctx) 572 if err != nil { 573 return diskStats{}, err 574 } 575 576 return sumDiskCounters(diskCounters), nil 577 } 578 579 func getSummedNetStats(ctx context.Context) (net.IOCountersStat, error) { 580 netCounters, err := net.IOCountersWithContext(ctx, true /* per NIC */) 581 if err != nil { 582 return net.IOCountersStat{}, err 583 } 584 585 return sumNetworkCounters(netCounters), nil 586 } 587 588 // sumDiskCounters returns a new disk.IOCountersStat whose values are the sum of the 589 // values in the slice of disk.IOCountersStats passed in. 590 func sumDiskCounters(disksStats []diskStats) diskStats { 591 output := diskStats{} 592 for _, stats := range disksStats { 593 output.readBytes += stats.readBytes 594 output.readCount += stats.readCount 595 output.readTime += stats.readTime 596 597 output.writeBytes += stats.writeBytes 598 output.writeCount += stats.writeCount 599 output.writeTime += stats.writeTime 600 601 output.ioTime += stats.ioTime 602 output.weightedIOTime += stats.weightedIOTime 603 604 output.iopsInProgress += stats.iopsInProgress 605 } 606 return output 607 } 608 609 // subtractDiskCounters subtracts the counters in `sub` from the counters in `from`, 610 // saving the results in `from`. 611 func subtractDiskCounters(from *diskStats, sub diskStats) { 612 from.writeCount -= sub.writeCount 613 from.writeBytes -= sub.writeBytes 614 from.writeTime -= sub.writeTime 615 616 from.readCount -= sub.readCount 617 from.readBytes -= sub.readBytes 618 from.readTime -= sub.readTime 619 620 from.ioTime -= sub.ioTime 621 from.weightedIOTime -= sub.weightedIOTime 622 } 623 624 // sumNetworkCounters returns a new net.IOCountersStat whose values are the sum of the 625 // values in the slice of net.IOCountersStats passed in. 626 func sumNetworkCounters(netCounters []net.IOCountersStat) net.IOCountersStat { 627 output := net.IOCountersStat{} 628 for _, counter := range netCounters { 629 output.BytesRecv += counter.BytesRecv 630 output.BytesSent += counter.BytesSent 631 output.PacketsRecv += counter.PacketsRecv 632 output.PacketsSent += counter.PacketsSent 633 } 634 return output 635 } 636 637 // subtractNetworkCounters subtracts the counters in `sub` from the counters in `from`, 638 // saving the results in `from`. 639 func subtractNetworkCounters(from *net.IOCountersStat, sub net.IOCountersStat) { 640 from.BytesRecv -= sub.BytesRecv 641 from.BytesSent -= sub.BytesSent 642 from.PacketsRecv -= sub.PacketsRecv 643 from.PacketsSent -= sub.PacketsSent 644 }