github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/status/runtime.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package status
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"os"
    17  	"runtime"
    18  	"runtime/debug"
    19  	"time"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/build"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    25  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    26  	"github.com/dustin/go-humanize"
    27  	"github.com/elastic/gosigar"
    28  	"github.com/shirou/gopsutil/net"
    29  )
    30  
    31  var (
    32  	metaCgoCalls = metric.Metadata{
    33  		Name:        "sys.cgocalls",
    34  		Help:        "Total number of cgo calls",
    35  		Measurement: "cgo Calls",
    36  		Unit:        metric.Unit_COUNT,
    37  	}
    38  	metaGoroutines = metric.Metadata{
    39  		Name:        "sys.goroutines",
    40  		Help:        "Current number of goroutines",
    41  		Measurement: "goroutines",
    42  		Unit:        metric.Unit_COUNT,
    43  	}
    44  	metaGoAllocBytes = metric.Metadata{
    45  		Name:        "sys.go.allocbytes",
    46  		Help:        "Current bytes of memory allocated by go",
    47  		Measurement: "Memory",
    48  		Unit:        metric.Unit_BYTES,
    49  	}
    50  	metaGoTotalBytes = metric.Metadata{
    51  		Name:        "sys.go.totalbytes",
    52  		Help:        "Total bytes of memory allocated by go, but not released",
    53  		Measurement: "Memory",
    54  		Unit:        metric.Unit_BYTES,
    55  	}
    56  	metaCgoAllocBytes = metric.Metadata{
    57  		Name:        "sys.cgo.allocbytes",
    58  		Help:        "Current bytes of memory allocated by cgo",
    59  		Measurement: "Memory",
    60  		Unit:        metric.Unit_BYTES,
    61  	}
    62  	metaCgoTotalBytes = metric.Metadata{
    63  		Name:        "sys.cgo.totalbytes",
    64  		Help:        "Total bytes of memory allocated by cgo, but not released",
    65  		Measurement: "Memory",
    66  		Unit:        metric.Unit_BYTES,
    67  	}
    68  	metaGCCount = metric.Metadata{
    69  		Name:        "sys.gc.count",
    70  		Help:        "Total number of GC runs",
    71  		Measurement: "GC Runs",
    72  		Unit:        metric.Unit_COUNT,
    73  	}
    74  	metaGCPauseNS = metric.Metadata{
    75  		Name:        "sys.gc.pause.ns",
    76  		Help:        "Total GC pause",
    77  		Measurement: "GC Pause",
    78  		Unit:        metric.Unit_NANOSECONDS,
    79  	}
    80  	metaGCPausePercent = metric.Metadata{
    81  		Name:        "sys.gc.pause.percent",
    82  		Help:        "Current GC pause percentage",
    83  		Measurement: "GC Pause",
    84  		Unit:        metric.Unit_PERCENT,
    85  	}
    86  	metaCPUUserNS = metric.Metadata{
    87  		Name:        "sys.cpu.user.ns",
    88  		Help:        "Total user cpu time",
    89  		Measurement: "CPU Time",
    90  		Unit:        metric.Unit_NANOSECONDS,
    91  	}
    92  	metaCPUUserPercent = metric.Metadata{
    93  		Name:        "sys.cpu.user.percent",
    94  		Help:        "Current user cpu percentage",
    95  		Measurement: "CPU Time",
    96  		Unit:        metric.Unit_PERCENT,
    97  	}
    98  	metaCPUSysNS = metric.Metadata{
    99  		Name:        "sys.cpu.sys.ns",
   100  		Help:        "Total system cpu time",
   101  		Measurement: "CPU Time",
   102  		Unit:        metric.Unit_NANOSECONDS,
   103  	}
   104  	metaCPUSysPercent = metric.Metadata{
   105  		Name:        "sys.cpu.sys.percent",
   106  		Help:        "Current system cpu percentage",
   107  		Measurement: "CPU Time",
   108  		Unit:        metric.Unit_PERCENT,
   109  	}
   110  	metaCPUCombinedPercentNorm = metric.Metadata{
   111  		Name:        "sys.cpu.combined.percent-normalized",
   112  		Help:        "Current user+system cpu percentage, normalized 0-1 by number of cores",
   113  		Measurement: "CPU Time",
   114  		Unit:        metric.Unit_PERCENT,
   115  	}
   116  	metaRSSBytes = metric.Metadata{
   117  		Name:        "sys.rss",
   118  		Help:        "Current process RSS",
   119  		Measurement: "RSS",
   120  		Unit:        metric.Unit_BYTES,
   121  	}
   122  	metaFDOpen = metric.Metadata{
   123  		Name:        "sys.fd.open",
   124  		Help:        "Process open file descriptors",
   125  		Measurement: "File Descriptors",
   126  		Unit:        metric.Unit_COUNT,
   127  	}
   128  	metaFDSoftLimit = metric.Metadata{
   129  		Name:        "sys.fd.softlimit",
   130  		Help:        "Process open FD soft limit",
   131  		Measurement: "File Descriptors",
   132  		Unit:        metric.Unit_COUNT,
   133  	}
   134  	metaUptime = metric.Metadata{
   135  		Name:        "sys.uptime",
   136  		Help:        "Process uptime",
   137  		Measurement: "Uptime",
   138  		Unit:        metric.Unit_SECONDS,
   139  	}
   140  
   141  	// These disk and network stats are counters of the number of operations, packets, bytes, and
   142  	// cumulative time of the disk and net IO that has been done across the whole host *since this
   143  	// Cockroach process started up*. By taking the derivatives of these metrics, we can see the
   144  	// IO throughput.
   145  	metaHostDiskReadCount = metric.Metadata{
   146  		Name:        "sys.host.disk.read.count",
   147  		Unit:        metric.Unit_COUNT,
   148  		Measurement: "Operations",
   149  		Help:        "Disk read operations across all disks since this process started",
   150  	}
   151  	metaHostDiskReadBytes = metric.Metadata{
   152  		Name:        "sys.host.disk.read.bytes",
   153  		Unit:        metric.Unit_BYTES,
   154  		Measurement: "Bytes",
   155  		Help:        "Bytes read from all disks since this process started",
   156  	}
   157  	metaHostDiskReadTime = metric.Metadata{
   158  		Name:        "sys.host.disk.read.time",
   159  		Unit:        metric.Unit_NANOSECONDS,
   160  		Measurement: "Time",
   161  		Help:        "Time spent reading from all disks since this process started",
   162  	}
   163  	metaHostDiskWriteCount = metric.Metadata{
   164  		Name:        "sys.host.disk.write.count",
   165  		Unit:        metric.Unit_COUNT,
   166  		Measurement: "Operations",
   167  		Help:        "Disk write operations across all disks since this process started",
   168  	}
   169  	metaHostDiskWriteBytes = metric.Metadata{
   170  		Name:        "sys.host.disk.write.bytes",
   171  		Unit:        metric.Unit_BYTES,
   172  		Measurement: "Bytes",
   173  		Help:        "Bytes written to all disks since this process started",
   174  	}
   175  	metaHostDiskWriteTime = metric.Metadata{
   176  		Name:        "sys.host.disk.write.time",
   177  		Unit:        metric.Unit_NANOSECONDS,
   178  		Measurement: "Time",
   179  		Help:        "Time spent writing to all disks since this process started",
   180  	}
   181  	metaHostDiskIOTime = metric.Metadata{
   182  		Name:        "sys.host.disk.io.time",
   183  		Unit:        metric.Unit_NANOSECONDS,
   184  		Measurement: "Time",
   185  		Help:        "Time spent reading from or writing to all disks since this process started",
   186  	}
   187  	metaHostDiskWeightedIOTime = metric.Metadata{
   188  		Name:        "sys.host.disk.weightedio.time",
   189  		Unit:        metric.Unit_NANOSECONDS,
   190  		Measurement: "Time",
   191  		Help:        "Weighted time spent reading from or writing to to all disks since this process started",
   192  	}
   193  	metaHostIopsInProgress = metric.Metadata{
   194  		Name:        "sys.host.disk.iopsinprogress",
   195  		Unit:        metric.Unit_COUNT,
   196  		Measurement: "Operations",
   197  		Help:        "IO operations currently in progress on this host",
   198  	}
   199  	metaHostNetRecvBytes = metric.Metadata{
   200  		Name:        "sys.host.net.recv.bytes",
   201  		Unit:        metric.Unit_BYTES,
   202  		Measurement: "Bytes",
   203  		Help:        "Bytes received on all network interfaces since this process started",
   204  	}
   205  	metaHostNetRecvPackets = metric.Metadata{
   206  		Name:        "sys.host.net.recv.packets",
   207  		Unit:        metric.Unit_COUNT,
   208  		Measurement: "Packets",
   209  		Help:        "Packets received on all network interfaces since this process started",
   210  	}
   211  	metaHostNetSendBytes = metric.Metadata{
   212  		Name:        "sys.host.net.send.bytes",
   213  		Unit:        metric.Unit_BYTES,
   214  		Measurement: "Bytes",
   215  		Help:        "Bytes sent on all network interfaces since this process started",
   216  	}
   217  	metaHostNetSendPackets = metric.Metadata{
   218  		Name:        "sys.host.net.send.packets",
   219  		Unit:        metric.Unit_COUNT,
   220  		Measurement: "Packets",
   221  		Help:        "Packets sent on all network interfaces since this process started",
   222  	}
   223  )
   224  
   225  // getCgoMemStats is a function that fetches stats for the C++ portion of the code.
   226  // We will not necessarily have implementations for all builds, so check for nil first.
   227  // Returns the following:
   228  // allocated uint: bytes allocated by application
   229  // total     uint: total bytes requested from system
   230  // error           : any issues fetching stats. This should be a warning only.
   231  var getCgoMemStats func(context.Context) (uint, uint, error)
   232  
   233  // RuntimeStatSampler is used to periodically sample the runtime environment
   234  // for useful statistics, performing some rudimentary calculations and storing
   235  // the resulting information in a format that can be easily consumed by status
   236  // logging systems.
   237  type RuntimeStatSampler struct {
   238  	clock *hlc.Clock
   239  
   240  	startTimeNanos int64
   241  	// The last sampled values of some statistics are kept only to compute
   242  	// derivative statistics.
   243  	last struct {
   244  		now         int64
   245  		utime       int64
   246  		stime       int64
   247  		cgoCall     int64
   248  		gcCount     int64
   249  		gcPauseTime uint64
   250  		disk        diskStats
   251  		net         net.IOCountersStat
   252  	}
   253  
   254  	initialDiskCounters diskStats
   255  	initialNetCounters  net.IOCountersStat
   256  
   257  	// Only show "not implemented" errors once, we don't need the log spam.
   258  	fdUsageNotImplemented bool
   259  
   260  	// Metric gauges maintained by the sampler.
   261  	// Go runtime stats.
   262  	CgoCalls       *metric.Gauge
   263  	Goroutines     *metric.Gauge
   264  	GoAllocBytes   *metric.Gauge
   265  	GoTotalBytes   *metric.Gauge
   266  	CgoAllocBytes  *metric.Gauge
   267  	CgoTotalBytes  *metric.Gauge
   268  	GcCount        *metric.Gauge
   269  	GcPauseNS      *metric.Gauge
   270  	GcPausePercent *metric.GaugeFloat64
   271  	// CPU stats.
   272  	CPUUserNS              *metric.Gauge
   273  	CPUUserPercent         *metric.GaugeFloat64
   274  	CPUSysNS               *metric.Gauge
   275  	CPUSysPercent          *metric.GaugeFloat64
   276  	CPUCombinedPercentNorm *metric.GaugeFloat64
   277  	// Memory stats.
   278  	RSSBytes *metric.Gauge
   279  	// File descriptor stats.
   280  	FDOpen      *metric.Gauge
   281  	FDSoftLimit *metric.Gauge
   282  	// Disk and network stats.
   283  	HostDiskReadBytes      *metric.Gauge
   284  	HostDiskReadCount      *metric.Gauge
   285  	HostDiskReadTime       *metric.Gauge
   286  	HostDiskWriteBytes     *metric.Gauge
   287  	HostDiskWriteCount     *metric.Gauge
   288  	HostDiskWriteTime      *metric.Gauge
   289  	HostDiskIOTime         *metric.Gauge
   290  	HostDiskWeightedIOTime *metric.Gauge
   291  	IopsInProgress         *metric.Gauge
   292  	HostNetRecvBytes       *metric.Gauge
   293  	HostNetRecvPackets     *metric.Gauge
   294  	HostNetSendBytes       *metric.Gauge
   295  	HostNetSendPackets     *metric.Gauge
   296  	// Uptime and build.
   297  	Uptime         *metric.Gauge // We use a gauge to be able to call Update.
   298  	BuildTimestamp *metric.Gauge
   299  }
   300  
   301  // NewRuntimeStatSampler constructs a new RuntimeStatSampler object.
   302  func NewRuntimeStatSampler(ctx context.Context, clock *hlc.Clock) *RuntimeStatSampler {
   303  	// Construct the build info metric. It is constant.
   304  	// We first build set the labels on the metadata.
   305  	info := build.GetInfo()
   306  	timestamp, err := info.Timestamp()
   307  	if err != nil {
   308  		// We can't panic here, tests don't have a build timestamp.
   309  		log.Warningf(ctx, "Could not parse build timestamp: %v", err)
   310  	}
   311  
   312  	// Build information.
   313  	metaBuildTimestamp := metric.Metadata{
   314  		Name:        "build.timestamp",
   315  		Help:        "Build information",
   316  		Measurement: "Build Time",
   317  		Unit:        metric.Unit_TIMESTAMP_SEC,
   318  	}
   319  	metaBuildTimestamp.AddLabel("tag", info.Tag)
   320  	metaBuildTimestamp.AddLabel("go_version", info.GoVersion)
   321  
   322  	buildTimestamp := metric.NewGauge(metaBuildTimestamp)
   323  	buildTimestamp.Update(timestamp)
   324  
   325  	diskCounters, err := getSummedDiskCounters(ctx)
   326  	if err != nil {
   327  		log.Errorf(ctx, "could not get initial disk IO counters: %v", err)
   328  	}
   329  	netCounters, err := getSummedNetStats(ctx)
   330  	if err != nil {
   331  		log.Errorf(ctx, "could not get initial disk IO counters: %v", err)
   332  	}
   333  
   334  	rsr := &RuntimeStatSampler{
   335  		clock:                  clock,
   336  		startTimeNanos:         clock.PhysicalNow(),
   337  		initialNetCounters:     netCounters,
   338  		initialDiskCounters:    diskCounters,
   339  		CgoCalls:               metric.NewGauge(metaCgoCalls),
   340  		Goroutines:             metric.NewGauge(metaGoroutines),
   341  		GoAllocBytes:           metric.NewGauge(metaGoAllocBytes),
   342  		GoTotalBytes:           metric.NewGauge(metaGoTotalBytes),
   343  		CgoAllocBytes:          metric.NewGauge(metaCgoAllocBytes),
   344  		CgoTotalBytes:          metric.NewGauge(metaCgoTotalBytes),
   345  		GcCount:                metric.NewGauge(metaGCCount),
   346  		GcPauseNS:              metric.NewGauge(metaGCPauseNS),
   347  		GcPausePercent:         metric.NewGaugeFloat64(metaGCPausePercent),
   348  		CPUUserNS:              metric.NewGauge(metaCPUUserNS),
   349  		CPUUserPercent:         metric.NewGaugeFloat64(metaCPUUserPercent),
   350  		CPUSysNS:               metric.NewGauge(metaCPUSysNS),
   351  		CPUSysPercent:          metric.NewGaugeFloat64(metaCPUSysPercent),
   352  		CPUCombinedPercentNorm: metric.NewGaugeFloat64(metaCPUCombinedPercentNorm),
   353  		RSSBytes:               metric.NewGauge(metaRSSBytes),
   354  		HostDiskReadBytes:      metric.NewGauge(metaHostDiskReadBytes),
   355  		HostDiskReadCount:      metric.NewGauge(metaHostDiskReadCount),
   356  		HostDiskReadTime:       metric.NewGauge(metaHostDiskReadTime),
   357  		HostDiskWriteBytes:     metric.NewGauge(metaHostDiskWriteBytes),
   358  		HostDiskWriteCount:     metric.NewGauge(metaHostDiskWriteCount),
   359  		HostDiskWriteTime:      metric.NewGauge(metaHostDiskWriteTime),
   360  		HostDiskIOTime:         metric.NewGauge(metaHostDiskIOTime),
   361  		HostDiskWeightedIOTime: metric.NewGauge(metaHostDiskWeightedIOTime),
   362  		IopsInProgress:         metric.NewGauge(metaHostIopsInProgress),
   363  		HostNetRecvBytes:       metric.NewGauge(metaHostNetRecvBytes),
   364  		HostNetRecvPackets:     metric.NewGauge(metaHostNetRecvPackets),
   365  		HostNetSendBytes:       metric.NewGauge(metaHostNetSendBytes),
   366  		HostNetSendPackets:     metric.NewGauge(metaHostNetSendPackets),
   367  		FDOpen:                 metric.NewGauge(metaFDOpen),
   368  		FDSoftLimit:            metric.NewGauge(metaFDSoftLimit),
   369  		Uptime:                 metric.NewGauge(metaUptime),
   370  		BuildTimestamp:         buildTimestamp,
   371  	}
   372  	rsr.last.disk = rsr.initialDiskCounters
   373  	rsr.last.net = rsr.initialNetCounters
   374  	return rsr
   375  }
   376  
   377  // GoMemStats groups a runtime.MemStats structure with the timestamp when it
   378  // was collected.
   379  type GoMemStats struct {
   380  	runtime.MemStats
   381  	// Collected is the timestamp at which these values were collected.
   382  	Collected time.Time
   383  }
   384  
   385  // SampleEnvironment queries the runtime system for various interesting metrics,
   386  // storing the resulting values in the set of metric gauges maintained by
   387  // RuntimeStatSampler. This makes runtime statistics more convenient for
   388  // consumption by the time series and status systems.
   389  //
   390  // This method should be called periodically by a higher level system in order
   391  // to keep runtime statistics current.
   392  //
   393  // SampleEnvironment takes GoMemStats as input because that is collected
   394  // separately, on a different schedule.
   395  func (rsr *RuntimeStatSampler) SampleEnvironment(ctx context.Context, ms GoMemStats) {
   396  	// Note that debug.ReadGCStats() does not suffer the same problem as
   397  	// runtime.ReadMemStats(). The only way you can know that is by reading the
   398  	// source.
   399  	gc := &debug.GCStats{}
   400  	debug.ReadGCStats(gc)
   401  
   402  	numCgoCall := runtime.NumCgoCall()
   403  	numGoroutine := runtime.NumGoroutine()
   404  
   405  	// Retrieve Mem and CPU statistics.
   406  	pid := os.Getpid()
   407  	mem := gosigar.ProcMem{}
   408  	if err := mem.Get(pid); err != nil {
   409  		log.Errorf(ctx, "unable to get mem usage: %v", err)
   410  	}
   411  	cpuTime := gosigar.ProcTime{}
   412  	if err := cpuTime.Get(pid); err != nil {
   413  		log.Errorf(ctx, "unable to get cpu usage: %v", err)
   414  	}
   415  
   416  	fds := gosigar.ProcFDUsage{}
   417  	if err := fds.Get(pid); err != nil {
   418  		if gosigar.IsNotImplemented(err) {
   419  			if !rsr.fdUsageNotImplemented {
   420  				rsr.fdUsageNotImplemented = true
   421  				log.Warningf(ctx, "unable to get file descriptor usage (will not try again): %s", err)
   422  			}
   423  		} else {
   424  			log.Errorf(ctx, "unable to get file descriptor usage: %s", err)
   425  		}
   426  	}
   427  
   428  	var deltaDisk diskStats
   429  	diskCounters, err := getSummedDiskCounters(ctx)
   430  	if err != nil {
   431  		log.Warningf(ctx, "problem fetching disk stats: %s; disk stats will be empty.", err)
   432  	} else {
   433  		deltaDisk = diskCounters
   434  		subtractDiskCounters(&deltaDisk, rsr.last.disk)
   435  		rsr.last.disk = diskCounters
   436  		subtractDiskCounters(&diskCounters, rsr.initialDiskCounters)
   437  
   438  		rsr.HostDiskReadBytes.Update(diskCounters.readBytes)
   439  		rsr.HostDiskReadCount.Update(diskCounters.readCount)
   440  		rsr.HostDiskReadTime.Update(int64(diskCounters.readTime))
   441  		rsr.HostDiskWriteBytes.Update(diskCounters.writeBytes)
   442  		rsr.HostDiskWriteCount.Update(diskCounters.writeCount)
   443  		rsr.HostDiskWriteTime.Update(int64(diskCounters.writeTime))
   444  		rsr.HostDiskIOTime.Update(int64(diskCounters.ioTime))
   445  		rsr.HostDiskWeightedIOTime.Update(int64(diskCounters.weightedIOTime))
   446  		rsr.IopsInProgress.Update(diskCounters.iopsInProgress)
   447  	}
   448  
   449  	var deltaNet net.IOCountersStat
   450  	netCounters, err := getSummedNetStats(ctx)
   451  	if err != nil {
   452  		log.Warningf(ctx, "problem fetching net stats: %s; net stats will be empty.", err)
   453  	} else {
   454  		deltaNet = netCounters
   455  		subtractNetworkCounters(&deltaNet, rsr.last.net)
   456  		rsr.last.net = netCounters
   457  		subtractNetworkCounters(&netCounters, rsr.initialNetCounters)
   458  
   459  		rsr.HostNetSendBytes.Update(int64(netCounters.BytesSent))
   460  		rsr.HostNetSendPackets.Update(int64(netCounters.PacketsSent))
   461  		rsr.HostNetRecvBytes.Update(int64(netCounters.BytesRecv))
   462  		rsr.HostNetRecvPackets.Update(int64(netCounters.PacketsRecv))
   463  	}
   464  
   465  	// Time statistics can be compared to the total elapsed time to create a
   466  	// useful percentage of total CPU usage, which would be somewhat less accurate
   467  	// if calculated later using downsampled time series data.
   468  	now := rsr.clock.PhysicalNow()
   469  	dur := float64(now - rsr.last.now)
   470  	// cpuTime.{User,Sys} are in milliseconds, convert to nanoseconds.
   471  	utime := int64(cpuTime.User) * 1e6
   472  	stime := int64(cpuTime.Sys) * 1e6
   473  	uPerc := float64(utime-rsr.last.utime) / dur
   474  	sPerc := float64(stime-rsr.last.stime) / dur
   475  	combinedNormalizedPerc := (sPerc + uPerc) / float64(runtime.NumCPU())
   476  	gcPausePercent := float64(uint64(gc.PauseTotal)-rsr.last.gcPauseTime) / dur
   477  	rsr.last.now = now
   478  	rsr.last.utime = utime
   479  	rsr.last.stime = stime
   480  	rsr.last.gcPauseTime = uint64(gc.PauseTotal)
   481  
   482  	var cgoAllocated, cgoTotal uint
   483  	if getCgoMemStats != nil {
   484  		var err error
   485  		cgoAllocated, cgoTotal, err = getCgoMemStats(ctx)
   486  		if err != nil {
   487  			log.Warningf(ctx, "problem fetching CGO memory stats: %s; CGO stats will be empty.", err)
   488  		}
   489  	}
   490  
   491  	// Log summary of statistics to console.
   492  	cgoRate := float64((numCgoCall-rsr.last.cgoCall)*int64(time.Second)) / dur
   493  	goMemStatsStale := timeutil.Now().Sub(ms.Collected) > time.Second
   494  	var staleMsg = ""
   495  	if goMemStatsStale {
   496  		staleMsg = "(stale)"
   497  	}
   498  	goTotal := ms.Sys - ms.HeapReleased
   499  	log.Infof(ctx, "%s", log.Safe(fmt.Sprintf("runtime stats: %s RSS, %d goroutines, %s/%s/%s GO alloc/idle/total%s, "+
   500  		"%s/%s CGO alloc/total, %.1f CGO/sec, %.1f/%.1f %%(u/s)time, %.1f %%gc (%dx), "+
   501  		"%s/%s (r/w)net",
   502  		humanize.IBytes(mem.Resident), numGoroutine,
   503  		humanize.IBytes(ms.HeapAlloc), humanize.IBytes(ms.HeapIdle), humanize.IBytes(goTotal),
   504  		staleMsg,
   505  		humanize.IBytes(uint64(cgoAllocated)), humanize.IBytes(uint64(cgoTotal)),
   506  		cgoRate, 100*uPerc, 100*sPerc, 100*gcPausePercent, gc.NumGC-rsr.last.gcCount,
   507  		humanize.IBytes(deltaNet.BytesRecv), humanize.IBytes(deltaNet.BytesSent),
   508  	)))
   509  	rsr.last.cgoCall = numCgoCall
   510  	rsr.last.gcCount = gc.NumGC
   511  
   512  	rsr.GoAllocBytes.Update(int64(ms.HeapAlloc))
   513  	rsr.GoTotalBytes.Update(int64(goTotal))
   514  	rsr.CgoCalls.Update(numCgoCall)
   515  	rsr.Goroutines.Update(int64(numGoroutine))
   516  	rsr.CgoAllocBytes.Update(int64(cgoAllocated))
   517  	rsr.CgoTotalBytes.Update(int64(cgoTotal))
   518  	rsr.GcCount.Update(gc.NumGC)
   519  	rsr.GcPauseNS.Update(int64(gc.PauseTotal))
   520  	rsr.GcPausePercent.Update(gcPausePercent)
   521  	rsr.CPUUserNS.Update(utime)
   522  	rsr.CPUUserPercent.Update(uPerc)
   523  	rsr.CPUSysNS.Update(stime)
   524  	rsr.CPUSysPercent.Update(sPerc)
   525  	rsr.CPUCombinedPercentNorm.Update(combinedNormalizedPerc)
   526  	rsr.FDOpen.Update(int64(fds.Open))
   527  	rsr.FDSoftLimit.Update(int64(fds.SoftLimit))
   528  	rsr.RSSBytes.Update(int64(mem.Resident))
   529  	rsr.Uptime.Update((now - rsr.startTimeNanos) / 1e9)
   530  }
   531  
   532  // GetCPUCombinedPercentNorm is part of the rowexec.RuntimeStats interface.
   533  func (rsr *RuntimeStatSampler) GetCPUCombinedPercentNorm() float64 {
   534  	return rsr.CPUCombinedPercentNorm.Value()
   535  }
   536  
   537  // diskStats contains the disk statistics returned by the operating
   538  // system. Interpretation of some of these stats varies by platform,
   539  // although as much as possible they are normalized to the semantics
   540  // used by linux's diskstats interface.
   541  //
   542  // Except for iopsInProgress, these metrics act like counters (always
   543  // increasing, and best interpreted as a rate).
   544  type diskStats struct {
   545  	readBytes int64
   546  	readCount int64
   547  
   548  	// readTime (and writeTime) may increase more than 1s per second if
   549  	// access to storage is parallelized.
   550  	readTime time.Duration
   551  
   552  	writeBytes int64
   553  	writeCount int64
   554  	writeTime  time.Duration
   555  
   556  	// ioTime is the amount of time that iopsInProgress is non-zero (so
   557  	// its increase is capped at 1s/s). Only available on linux.
   558  	ioTime time.Duration
   559  
   560  	// weightedIOTime is a linux-specific metric that attempts to
   561  	// represent "an easy measure of both I/O completion time and the
   562  	// backlog that may be accumulating."
   563  	weightedIOTime time.Duration
   564  
   565  	// iopsInProgress is a gauge of the number of pending IO operations.
   566  	// Not available on macOS.
   567  	iopsInProgress int64
   568  }
   569  
   570  func getSummedDiskCounters(ctx context.Context) (diskStats, error) {
   571  	diskCounters, err := getDiskCounters(ctx)
   572  	if err != nil {
   573  		return diskStats{}, err
   574  	}
   575  
   576  	return sumDiskCounters(diskCounters), nil
   577  }
   578  
   579  func getSummedNetStats(ctx context.Context) (net.IOCountersStat, error) {
   580  	netCounters, err := net.IOCountersWithContext(ctx, true /* per NIC */)
   581  	if err != nil {
   582  		return net.IOCountersStat{}, err
   583  	}
   584  
   585  	return sumNetworkCounters(netCounters), nil
   586  }
   587  
   588  // sumDiskCounters returns a new disk.IOCountersStat whose values are the sum of the
   589  // values in the slice of disk.IOCountersStats passed in.
   590  func sumDiskCounters(disksStats []diskStats) diskStats {
   591  	output := diskStats{}
   592  	for _, stats := range disksStats {
   593  		output.readBytes += stats.readBytes
   594  		output.readCount += stats.readCount
   595  		output.readTime += stats.readTime
   596  
   597  		output.writeBytes += stats.writeBytes
   598  		output.writeCount += stats.writeCount
   599  		output.writeTime += stats.writeTime
   600  
   601  		output.ioTime += stats.ioTime
   602  		output.weightedIOTime += stats.weightedIOTime
   603  
   604  		output.iopsInProgress += stats.iopsInProgress
   605  	}
   606  	return output
   607  }
   608  
   609  // subtractDiskCounters subtracts the counters in `sub` from the counters in `from`,
   610  // saving the results in `from`.
   611  func subtractDiskCounters(from *diskStats, sub diskStats) {
   612  	from.writeCount -= sub.writeCount
   613  	from.writeBytes -= sub.writeBytes
   614  	from.writeTime -= sub.writeTime
   615  
   616  	from.readCount -= sub.readCount
   617  	from.readBytes -= sub.readBytes
   618  	from.readTime -= sub.readTime
   619  
   620  	from.ioTime -= sub.ioTime
   621  	from.weightedIOTime -= sub.weightedIOTime
   622  }
   623  
   624  // sumNetworkCounters returns a new net.IOCountersStat whose values are the sum of the
   625  // values in the slice of net.IOCountersStats passed in.
   626  func sumNetworkCounters(netCounters []net.IOCountersStat) net.IOCountersStat {
   627  	output := net.IOCountersStat{}
   628  	for _, counter := range netCounters {
   629  		output.BytesRecv += counter.BytesRecv
   630  		output.BytesSent += counter.BytesSent
   631  		output.PacketsRecv += counter.PacketsRecv
   632  		output.PacketsSent += counter.PacketsSent
   633  	}
   634  	return output
   635  }
   636  
   637  // subtractNetworkCounters subtracts the counters in `sub` from the counters in `from`,
   638  // saving the results in `from`.
   639  func subtractNetworkCounters(from *net.IOCountersStat, sub net.IOCountersStat) {
   640  	from.BytesRecv -= sub.BytesRecv
   641  	from.BytesSent -= sub.BytesSent
   642  	from.PacketsRecv -= sub.PacketsRecv
   643  	from.PacketsSent -= sub.PacketsSent
   644  }