github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/stats/target_stats.go (about)

     1  // Package stats provides methods and functionality to register, track, log,
     2  // and StatsD-notify statistics that, for the most part, include "counter" and "latency" kinds.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package stats
     7  
     8  import (
     9  	"fmt"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  	"unsafe"
    14  
    15  	"github.com/NVIDIA/aistore/cmn"
    16  	"github.com/NVIDIA/aistore/cmn/atomic"
    17  	"github.com/NVIDIA/aistore/cmn/cos"
    18  	"github.com/NVIDIA/aistore/cmn/debug"
    19  	"github.com/NVIDIA/aistore/cmn/nlog"
    20  	"github.com/NVIDIA/aistore/core"
    21  	"github.com/NVIDIA/aistore/core/meta"
    22  	"github.com/NVIDIA/aistore/fs"
    23  	"github.com/NVIDIA/aistore/ios"
    24  	"github.com/NVIDIA/aistore/memsys"
    25  	"github.com/NVIDIA/aistore/sys"
    26  	"github.com/NVIDIA/aistore/transport"
    27  )
    28  
    29  // Naming Convention:
    30  //
    31  //	-> "*.n" - counter
    32  //	-> "*.ns" - latency (nanoseconds)
    33  //	-> "*.size" - size (bytes)
    34  //	-> "*.bps" - throughput (in byte/s)
    35  //	-> "*.id" - ID
    36  const (
    37  	// KindCounter & KindSize - always incremented
    38  
    39  	// NOTE semantics:
    40  	// - counts all instances when remote GET is followed by storing of the new object (version) locally
    41  	// - does _not_ count assorted calls to `GetObjReader` (e.g., via tcb/tco -> LDP.Reader)
    42  	GetColdCount = "get.cold.n"
    43  	GetColdSize  = "get.cold.size"
    44  
    45  	LruEvictCount = "lru.evict.n"
    46  	LruEvictSize  = "lru.evict.size"
    47  
    48  	CleanupStoreCount = "cleanup.store.n"
    49  	CleanupStoreSize  = "cleanup.store.size"
    50  
    51  	VerChangeCount = "ver.change.n"
    52  	VerChangeSize  = "ver.change.size"
    53  
    54  	// intra-cluster transmit & receive
    55  	StreamsOutObjCount = transport.OutObjCount
    56  	StreamsOutObjSize  = transport.OutObjSize
    57  	StreamsInObjCount  = transport.InObjCount
    58  	StreamsInObjSize   = transport.InObjSize
    59  
    60  	// errors
    61  	ErrCksumCount    = "err.cksum.n"
    62  	ErrCksumSize     = "err.cksum.size"
    63  	ErrMetadataCount = "err.md.n"
    64  	ErrIOCount       = "err.io.n"
    65  
    66  	// target restarted (effectively, boolean)
    67  	RestartCount = "restart.n"
    68  
    69  	// KindLatency
    70  	PutLatency      = "put.ns"
    71  	AppendLatency   = "append.ns"
    72  	GetRedirLatency = "get.redir.ns"
    73  	PutRedirLatency = "put.redir.ns"
    74  	DownloadLatency = "dl.ns"
    75  
    76  	// (read remote, write local) latency (and note that cold-GET's "pure"
    77  	// transmit-response latency = GetLatency - GetColdRwLatency)
    78  	GetColdRwLatency = "get.cold.rw.ns"
    79  
    80  	// Dsort
    81  	DsortCreationReqCount    = "dsort.creation.req.n"
    82  	DsortCreationRespCount   = "dsort.creation.resp.n"
    83  	DsortCreationRespLatency = "dsort.creation.resp.ns"
    84  	DsortExtractShardDskCnt  = "dsort.extract.shard.dsk.n"
    85  	DsortExtractShardMemCnt  = "dsort.extract.shard.mem.n"
    86  	DsortExtractShardSize    = "dsort.extract.shard.size" // uncompressed
    87  
    88  	// Downloader
    89  	DownloadSize = "dl.size"
    90  
    91  	// KindThroughput
    92  	GetThroughput = "get.bps" // bytes per second
    93  	PutThroughput = "put.bps" // ditto
    94  
    95  	// same as above via `.cumulative`
    96  	GetSize = "get.size"
    97  	PutSize = "put.size"
    98  
    99  	// core
   100  	RemoteDeletedDelCount = core.RemoteDeletedDelCount // compare w/ common `DeleteCount`
   101  
   102  	LcacheCollisionCount = core.LcacheCollisionCount
   103  	LcacheEvictedCount   = core.LcacheEvictedCount
   104  	LcacheFlushColdCount = core.LcacheFlushColdCount
   105  
   106  	// variable label used for prometheus disk metrics
   107  	diskMetricLabel = "disk"
   108  )
   109  
   110  type (
   111  	Trunner struct {
   112  		t         core.NodeMemCap
   113  		TargetCDF fs.TargetCDF `json:"cdf"`
   114  		disk      ios.AllDiskStats
   115  		xln       string
   116  		runner    // the base (compare w/ Prunner)
   117  		lines     []string
   118  		fsIDs     []cos.FsID
   119  		mem       sys.MemStat
   120  		xallRun   core.AllRunningInOut
   121  		standby   bool
   122  	}
   123  )
   124  
   125  const (
   126  	minLogDiskUtil = 10 // skip logging idle disks
   127  
   128  	numTargetStats = 48 // approx. initial
   129  )
   130  
   131  /////////////
   132  // Trunner //
   133  /////////////
   134  
   135  // interface guard
   136  var _ cos.Runner = (*Trunner)(nil)
   137  
   138  func NewTrunner(t core.NodeMemCap) *Trunner { return &Trunner{t: t} }
   139  
   140  func (r *Trunner) Run() error     { return r._run(r /*as statsLogger*/) }
   141  func (r *Trunner) Standby(v bool) { r.standby = v }
   142  
   143  func (r *Trunner) Init(t core.Target) *atomic.Bool {
   144  	r.core = &coreStats{}
   145  
   146  	r.core.init(numTargetStats)
   147  
   148  	r.regCommon(t.Snode())
   149  
   150  	r.ctracker = make(copyTracker, numTargetStats) // these two are allocated once and only used in serial context
   151  	r.lines = make([]string, 0, 16)
   152  	r.disk = make(ios.AllDiskStats, 16)
   153  
   154  	config := cmn.GCO.Get()
   155  	r.core.statsTime = config.Periodic.StatsTime.D()
   156  
   157  	r.runner.name = "targetstats"
   158  	r.runner.daemon = t
   159  
   160  	r.runner.stopCh = make(chan struct{}, 4)
   161  
   162  	r.core.initMetricClient(t.Snode(), &r.runner)
   163  
   164  	r.sorted = make([]string, 0, numTargetStats)
   165  
   166  	r.xallRun.Running = make([]string, 16)
   167  	r.xallRun.Idle = make([]string, 16)
   168  
   169  	return &r.runner.startedUp
   170  }
   171  
   172  func (r *Trunner) InitCDF() error {
   173  	fs.InitCDF(&r.TargetCDF)
   174  	_, err, errCap := fs.CapRefresh(nil, &r.TargetCDF)
   175  	if err != nil {
   176  		return err
   177  	}
   178  	if errCap != nil {
   179  		nlog.Errorln(r.t.String()+":", errCap)
   180  	}
   181  	return nil
   182  }
   183  
   184  func diskMetricName(disk, metric string) string {
   185  	return fmt.Sprintf("%s.%s.%s", diskMetricLabel, disk, metric)
   186  }
   187  func nameRbps(disk string) string { return diskMetricName(disk, "read.bps") }
   188  func nameRavg(disk string) string { return diskMetricName(disk, "avg.rsize") }
   189  func nameWbps(disk string) string { return diskMetricName(disk, "write.bps") }
   190  func nameWavg(disk string) string { return diskMetricName(disk, "avg.wsize") }
   191  func nameUtil(disk string) string { return diskMetricName(disk, "util") }
   192  
   193  // log vs idle logic
   194  func isDiskMetric(name string) bool {
   195  	return strings.HasPrefix(name, "disk.")
   196  }
   197  func isDiskUtilMetric(name string) bool {
   198  	return isDiskMetric(name) && strings.HasSuffix(name, ".util")
   199  }
   200  
   201  // extractPromDiskMetricName returns prometheus friendly metrics name
   202  // from disk tracker name of format `disk.<disk-name>.<metric-name>`
   203  // it returns, two strings:
   204  //  1. <disk-name> used as prometheus variable label
   205  //  2. `disk.<metric-name>` used for prometheus metric name
   206  func extractPromDiskMetricName(name string) (diskName, metricName string) {
   207  	diskName = strings.Split(name, ".")[1]
   208  	return diskName, strings.ReplaceAll(name, "."+diskName+".", ".")
   209  }
   210  
   211  // target-specific metrics, in addition to common and already added via regCommon()
   212  func (r *Trunner) RegMetrics(node *meta.Snode) {
   213  	r.reg(node, GetColdCount, KindCounter)
   214  	r.reg(node, GetColdSize, KindSize)
   215  
   216  	r.reg(node, LruEvictCount, KindCounter)
   217  	r.reg(node, LruEvictSize, KindSize)
   218  
   219  	r.reg(node, CleanupStoreCount, KindCounter)
   220  	r.reg(node, CleanupStoreSize, KindSize)
   221  
   222  	r.reg(node, VerChangeCount, KindCounter)
   223  	r.reg(node, VerChangeSize, KindSize)
   224  
   225  	r.reg(node, PutLatency, KindLatency)
   226  	r.reg(node, AppendLatency, KindLatency)
   227  	r.reg(node, GetRedirLatency, KindLatency)
   228  	r.reg(node, PutRedirLatency, KindLatency)
   229  	r.reg(node, GetColdRwLatency, KindLatency)
   230  
   231  	// bps
   232  	r.reg(node, GetThroughput, KindThroughput)
   233  	r.reg(node, PutThroughput, KindThroughput)
   234  
   235  	r.reg(node, GetSize, KindSize)
   236  	r.reg(node, PutSize, KindSize)
   237  
   238  	// errors
   239  	r.reg(node, ErrCksumCount, KindCounter)
   240  	r.reg(node, ErrCksumSize, KindSize)
   241  
   242  	r.reg(node, ErrMetadataCount, KindCounter)
   243  	r.reg(node, ErrIOCount, KindCounter)
   244  
   245  	// streams
   246  	r.reg(node, StreamsOutObjCount, KindCounter)
   247  	r.reg(node, StreamsOutObjSize, KindSize)
   248  	r.reg(node, StreamsInObjCount, KindCounter)
   249  	r.reg(node, StreamsInObjSize, KindSize)
   250  
   251  	// node restarted
   252  	r.reg(node, RestartCount, KindCounter)
   253  
   254  	// download
   255  	r.reg(node, DownloadSize, KindSize)
   256  	r.reg(node, DownloadLatency, KindLatency)
   257  
   258  	// dsort
   259  	r.reg(node, DsortCreationReqCount, KindCounter)
   260  	r.reg(node, DsortCreationRespCount, KindCounter)
   261  	r.reg(node, DsortCreationRespLatency, KindLatency)
   262  	r.reg(node, DsortExtractShardDskCnt, KindCounter)
   263  	r.reg(node, DsortExtractShardMemCnt, KindCounter)
   264  	r.reg(node, DsortExtractShardSize, KindSize)
   265  
   266  	// core
   267  	r.reg(node, RemoteDeletedDelCount, KindCounter)
   268  	r.reg(node, LcacheCollisionCount, KindCounter)
   269  	r.reg(node, LcacheEvictedCount, KindCounter)
   270  	r.reg(node, LcacheFlushColdCount, KindCounter)
   271  
   272  	// Prometheus
   273  	r.core.initProm(node)
   274  }
   275  
   276  func (r *Trunner) RegDiskMetrics(node *meta.Snode, disk string) {
   277  	s, n := r.core.Tracker, nameRbps(disk)
   278  	if _, ok := s[n]; ok { // must be config.TestingEnv()
   279  		return
   280  	}
   281  	r.reg(node, n, KindComputedThroughput)
   282  	r.reg(node, nameWbps(disk), KindComputedThroughput)
   283  
   284  	r.reg(node, nameRavg(disk), KindGauge)
   285  	r.reg(node, nameWavg(disk), KindGauge)
   286  	r.reg(node, nameUtil(disk), KindGauge)
   287  }
   288  
   289  func (r *Trunner) GetStats() (ds *Node) {
   290  	ds = r.runner.GetStats()
   291  
   292  	fs.InitCDF(&ds.TargetCDF)
   293  	fs.CapRefresh(nil, &ds.TargetCDF)
   294  	return ds
   295  }
   296  
   297  // [backward compatibility] v3.22 and prior
   298  func (r *Trunner) GetStatsV322() (out *NodeV322) {
   299  	ds := r.GetStats()
   300  
   301  	out = &NodeV322{}
   302  	out.Snode = ds.Snode
   303  	out.Tracker = ds.Tracker
   304  	out.TargetCDF.PctMax = ds.TargetCDF.PctMax
   305  	out.TargetCDF.PctAvg = ds.TargetCDF.PctAvg
   306  	out.TargetCDF.PctMin = ds.TargetCDF.PctMin
   307  	out.TargetCDF.CsErr = ds.TargetCDF.CsErr
   308  	out.TargetCDF.Mountpaths = make(map[string]*fs.CDFv322, len(ds.TargetCDF.Mountpaths))
   309  	for mpath := range ds.TargetCDF.Mountpaths {
   310  		cdf := &fs.CDFv322{
   311  			Capacity: ds.TargetCDF.Mountpaths[mpath].Capacity,
   312  			Disks:    ds.TargetCDF.Mountpaths[mpath].Disks,
   313  			FS:       ds.TargetCDF.Mountpaths[mpath].FS.String(),
   314  		}
   315  		out.TargetCDF.Mountpaths[mpath] = cdf
   316  	}
   317  	return out
   318  }
   319  
   320  func (r *Trunner) log(now int64, uptime time.Duration, config *cmn.Config) {
   321  	r.lines = r.lines[:0]
   322  
   323  	// 1. collect disk stats and populate the tracker
   324  	s := r.core
   325  	fs.FillDiskStats(r.disk)
   326  	for disk, stats := range r.disk {
   327  		v := s.Tracker[nameRbps(disk)]
   328  		v.Value = stats.RBps
   329  		v = s.Tracker[nameRavg(disk)]
   330  		v.Value = stats.Ravg
   331  		v = s.Tracker[nameWbps(disk)]
   332  		v.Value = stats.WBps
   333  		v = s.Tracker[nameWavg(disk)]
   334  		v.Value = stats.Wavg
   335  		v = s.Tracker[nameUtil(disk)]
   336  		v.Value = stats.Util
   337  	}
   338  
   339  	// 2 copy stats, reset latencies, send via StatsD if configured
   340  	s.updateUptime(uptime)
   341  	s.promLock()
   342  	idle := s.copyT(r.ctracker, config.Disk.DiskUtilLowWM)
   343  	s.promUnlock()
   344  
   345  	if now >= r.next || !idle {
   346  		s.sgl.Reset() // sharing w/ CoreStats.copyT
   347  		r.ctracker.write(s.sgl, r.sorted, true /*target*/, idle)
   348  		if s.sgl.Len() > 3 { // skip '{}'
   349  			line := string(s.sgl.Bytes())
   350  			if line != r.prev {
   351  				r.lines = append(r.lines, line)
   352  				r.prev = line
   353  			}
   354  		}
   355  	}
   356  
   357  	// 3. capacity
   358  	var (
   359  		cs, updated, err, errCap = fs.CapPeriodic(now, config, &r.TargetCDF)
   360  	)
   361  	if err != nil {
   362  		nlog.Errorln(err)
   363  	} else if errCap != nil {
   364  		r.t.OOS(&cs)
   365  	} else if updated && cs.PctMax > int32(config.Space.CleanupWM) {
   366  		debug.Assert(!cs.IsOOS(), cs.String())
   367  		errCap = cmn.NewErrCapExceeded(cs.TotalUsed, cs.TotalAvail+cs.TotalUsed, 0, config.Space.CleanupWM, cs.PctMax, false)
   368  		r.t.OOS(&cs)
   369  	}
   370  	if (updated && now >= r.next) || errCap != nil || now >= r.lastCap+maxCapLogInterval {
   371  		fast := fs.NoneShared(len(r.TargetCDF.Mountpaths))
   372  		unique := fast
   373  		if !fast {
   374  			r.fsIDs = r.fsIDs[:0]
   375  		}
   376  		for mpath, cdf := range r.TargetCDF.Mountpaths {
   377  			if !fast {
   378  				r.fsIDs, unique = cos.AddUniqueFsID(r.fsIDs, cdf.FS.FsID)
   379  			}
   380  			if unique {
   381  				s := mpath + cdf.Label.ToLog() + ": used " + strconv.Itoa(int(cdf.Capacity.PctUsed)) + "%"
   382  				s += ", avail " + cos.ToSizeIEC(int64(cdf.Capacity.Avail), 2)
   383  				r.lines = append(r.lines, s)
   384  			}
   385  		}
   386  		r.lastCap = now
   387  	}
   388  
   389  	// 4. append disk stats to log subject to (idle) filtering (see related: `ignoreIdle`)
   390  	r.logDiskStats(now)
   391  
   392  	// 5. jobs
   393  	verbose := cmn.Rom.FastV(4, cos.SmoduleStats)
   394  	if !idle {
   395  		var ln string
   396  		r.xallRun.Running = r.xallRun.Running[:0]
   397  		r.xallRun.Idle = r.xallRun.Idle[:0]
   398  		orig := r.xallRun.Idle
   399  		if !verbose {
   400  			r.xallRun.Idle = nil
   401  		}
   402  		r.t.GetAllRunning(&r.xallRun, true /*periodic*/)
   403  		if !verbose {
   404  			r.xallRun.Idle = orig
   405  		}
   406  		if len(r.xallRun.Running) > 0 {
   407  			ln = "running: " + strings.Join(r.xallRun.Running, " ")
   408  			if len(r.xallRun.Idle) > 0 {
   409  				ln += ";  idle: " + strings.Join(r.xallRun.Idle, " ")
   410  			}
   411  		} else if len(r.xallRun.Idle) > 0 {
   412  			ln += "idle: " + strings.Join(r.xallRun.Idle, " ")
   413  		}
   414  		if ln != "" && ln != r.xln {
   415  			r.lines = append(r.lines, ln)
   416  		}
   417  		r.xln = ln
   418  	}
   419  
   420  	// 6. log
   421  	for _, ln := range r.lines {
   422  		nlog.Infoln(ln)
   423  	}
   424  
   425  	// 7. separately, memory
   426  	_ = r.mem.Get()
   427  	var (
   428  		mm       = r.t.PageMM()
   429  		pressure = mm.Pressure(&r.mem)
   430  	)
   431  	switch {
   432  	case pressure >= memsys.PressureExtreme:
   433  		nlog.Errorln(mm.Str(&r.mem))
   434  	case pressure >= memsys.PressureHigh:
   435  		nlog.Warningln(mm.Str(&r.mem))
   436  	}
   437  
   438  	if now > r.next {
   439  		r.next = now + maxStatsLogInterval
   440  	}
   441  }
   442  
   443  // log formatted disk stats:
   444  // [ disk: read throughput, average read size, write throughput, average write size, disk utilization ]
   445  // e.g.: [ sda: 94MiB/s, 68KiB, 25MiB/s, 21KiB, 82% ]
   446  func (r *Trunner) logDiskStats(now int64) {
   447  	for disk, stats := range r.disk {
   448  		if stats.Util < minLogDiskUtil/2 || (stats.Util < minLogDiskUtil && now < r.next) {
   449  			continue
   450  		}
   451  
   452  		rbps := cos.ToSizeIEC(stats.RBps, 0)
   453  		wbps := cos.ToSizeIEC(stats.WBps, 0)
   454  		ravg := cos.ToSizeIEC(stats.Ravg, 0)
   455  		wavg := cos.ToSizeIEC(stats.Wavg, 0)
   456  		l := len(disk) + len(rbps) + len(wbps) + len(ravg) + len(wavg) + 64
   457  		buf := make([]byte, 0, l)
   458  		buf = append(buf, disk...)
   459  		buf = append(buf, ": "...)
   460  		buf = append(buf, rbps...)
   461  		buf = append(buf, "/s, "...)
   462  		buf = append(buf, ravg...)
   463  		buf = append(buf, ", "...)
   464  		buf = append(buf, wbps...)
   465  		buf = append(buf, "/s, "...)
   466  		buf = append(buf, wavg...)
   467  		buf = append(buf, ", "...)
   468  		buf = append(buf, strconv.FormatInt(stats.Util, 10)...)
   469  		buf = append(buf, "%"...)
   470  		r.lines = append(r.lines, *(*string)(unsafe.Pointer(&buf)))
   471  	}
   472  }
   473  
   474  func (r *Trunner) statsTime(newval time.Duration) {
   475  	r.core.statsTime = newval
   476  }
   477  
   478  func (r *Trunner) standingBy() bool { return r.standby }