github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/stats/common_stats.go (about)

     1  // Package stats provides methods and functionality to register, track, log,
     2  // and StatsD-notify statistics that, for the most part, include "counter" and "latency" kinds.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package stats
     7  
     8  import (
     9  	"encoding/json"
    10  	"fmt"
    11  	rfs "io/fs"
    12  	"os"
    13  	"path/filepath"
    14  	"runtime"
    15  	"sort"
    16  	"strconv"
    17  	"strings"
    18  	"sync"
    19  	ratomic "sync/atomic"
    20  	"time"
    21  
    22  	"github.com/NVIDIA/aistore/cmn"
    23  	"github.com/NVIDIA/aistore/cmn/atomic"
    24  	"github.com/NVIDIA/aistore/cmn/cos"
    25  	"github.com/NVIDIA/aistore/cmn/debug"
    26  	"github.com/NVIDIA/aistore/cmn/mono"
    27  	"github.com/NVIDIA/aistore/cmn/nlog"
    28  	"github.com/NVIDIA/aistore/core/meta"
    29  	"github.com/NVIDIA/aistore/hk"
    30  	"github.com/NVIDIA/aistore/memsys"
    31  	"github.com/NVIDIA/aistore/stats/statsd"
    32  	jsoniter "github.com/json-iterator/go"
    33  	"github.com/prometheus/client_golang/prometheus"
    34  )
    35  
    36  const (
    37  	dfltPeriodicFlushTime = time.Minute            // when `config.Log.FlushTime` is 0 (zero)
    38  	dfltPeriodicTimeStamp = time.Hour              // extended date/time complementary to log timestamps (e.g., "11:29:11.644596")
    39  	maxStatsLogInterval   = int64(3 * time.Minute) // when idle; secondly, an upper limit on `config.Log.StatsTime`
    40  	maxCapLogInterval     = int64(4 * time.Hour)   // to see capacity at least few times a day (when idle)
    41  )
    42  
    43  // more periodic
    44  const (
    45  	maxLogSizeCheckTime = 48 * time.Minute       // periodically check the logs for max accumulated size
    46  	startupSleep        = 300 * time.Millisecond // periodically poll ClusterStarted()
    47  	numGorHighCheckTime = 2 * time.Minute        // periodically log a warning if the number of goroutines remains high
    48  )
    49  
    50  // number-of-goroutines watermarks expressed as multipliers over the number of available logical CPUs (GOMAXPROCS)
    51  const (
    52  	numGorHigh    = 100
    53  	numGorExtreme = 1000
    54  )
    55  
    56  // metrics
    57  const (
    58  	// KindCounter:
    59  	// all basic counters are accompanied by the corresponding (errPrefix + kind) error count:
    60  	// e.g.: "get.n" => "err.get.n", "put.n" => "err.put.n", etc.
    61  	// See also: `IncErr`, `regCommon`
    62  	GetCount    = "get.n"    // GET(object) count = (cold + warm)
    63  	PutCount    = "put.n"    // ditto PUT
    64  	AppendCount = "append.n" // ditto etc.
    65  	DeleteCount = "del.n"    // ditto
    66  	RenameCount = "ren.n"    // ditto
    67  	ListCount   = "lst.n"    // list-objects
    68  
    69  	// statically defined err counts (NOTE: update regCommon when adding/updating)
    70  	ErrHTTPWriteCount = errPrefix + "http.write.n"
    71  	ErrDownloadCount  = errPrefix + "dl.n"
    72  	ErrPutMirrorCount = errPrefix + "put.mirror.n"
    73  
    74  	// KindLatency
    75  	GetLatency       = "get.ns"
    76  	ListLatency      = "lst.ns"
    77  	KeepAliveLatency = "kalive.ns"
    78  
    79  	// KindSpecial
    80  	Uptime = "up.ns.time"
    81  )
    82  
    83  // interfaces
    84  type (
    85  	metric = statsd.Metric // type alias
    86  
    87  	// implemented by the stats runners
    88  	statsLogger interface {
    89  		log(now int64, uptime time.Duration, config *cmn.Config)
    90  		statsTime(newval time.Duration)
    91  		standingBy() bool
    92  	}
    93  	runnerHost interface {
    94  		ClusterStarted() bool
    95  	}
    96  )
    97  
    98  // primitives: values and maps
    99  type (
   100  	// Stats are tracked via a map of stats names (key) to statsValue (values).
   101  	// There are two main types of stats: counter and latency declared
   102  	// using the the kind field. Only latency stats have numSamples used to compute latency.
   103  	statsValue struct {
   104  		kind  string // enum { KindCounter, ..., KindSpecial }
   105  		label struct {
   106  			comm string // common part of the metric label (as in: <prefix> . comm . <suffix>)
   107  			stsd string // StatsD label
   108  			prom string // Prometheus label
   109  		}
   110  		Value      int64 `json:"v,string"`
   111  		numSamples int64 // (log + StatsD) only
   112  		cumulative int64
   113  	}
   114  	copyValue struct {
   115  		Value int64 `json:"v,string"`
   116  	}
   117  	copyTracker map[string]copyValue // aggregated every statsTime interval
   118  	promDesc    map[string]*prometheus.Desc
   119  )
   120  
   121  // main types
   122  type (
   123  	coreStats struct {
   124  		Tracker   map[string]*statsValue
   125  		promDesc  promDesc
   126  		statsdC   *statsd.Client
   127  		sgl       *memsys.SGL
   128  		statsTime time.Duration
   129  		cmu       sync.RWMutex // ctracker vs Prometheus Collect()
   130  	}
   131  
   132  	// Prunner and Trunner
   133  	runner struct {
   134  		daemon    runnerHost
   135  		stopCh    chan struct{}
   136  		ticker    *time.Ticker
   137  		core      *coreStats
   138  		ctracker  copyTracker // to avoid making it at runtime
   139  		sorted    []string    // sorted names
   140  		name      string      // this stats-runner's name
   141  		prev      string      // prev ctracker.write
   142  		next      int64       // mono.NanoTime()
   143  		lastCap   int64       // ditto
   144  		startedUp atomic.Bool
   145  	}
   146  )
   147  
   148  // interface guard
   149  var (
   150  	_ Tracker = (*Prunner)(nil)
   151  	_ Tracker = (*Trunner)(nil)
   152  )
   153  
   154  // sample name ais.ip-10-0-2-19.root.log.INFO.20180404-031540.2249
   155  var logtypes = []string{".INFO.", ".WARNING.", ".ERROR."}
   156  
   157  var ignoreIdle = []string{"kalive", Uptime, "disk."}
   158  
   159  func ignore(s string) bool {
   160  	for _, p := range ignoreIdle {
   161  		if strings.HasPrefix(s, p) {
   162  			return true
   163  		}
   164  	}
   165  	return false
   166  }
   167  
   168  ///////////////
   169  // coreStats //
   170  ///////////////
   171  
   172  // interface guard
   173  var (
   174  	_ json.Marshaler   = (*coreStats)(nil)
   175  	_ json.Unmarshaler = (*coreStats)(nil)
   176  )
   177  
   178  // convert bytes to meGabytes with a fixed rounding precision = 2 digits
   179  // - KindThroughput and KindComputedThroughput only
   180  // - MB, not MiB
   181  // - math.Ceil wouldn't produce two decimals
   182  func roundMBs(val int64) (mbs float64) {
   183  	mbs = float64(val) / 1000 / 10
   184  	num := int(mbs + 0.5)
   185  	mbs = float64(num) / 100
   186  	return
   187  }
   188  
   189  func (s *coreStats) init(size int) {
   190  	s.Tracker = make(map[string]*statsValue, size)
   191  	s.promDesc = make(promDesc, size)
   192  
   193  	s.sgl = memsys.PageMM().NewSGL(memsys.PageSize)
   194  }
   195  
   196  // NOTE: nil StatsD client means that we provide metrics to Prometheus (see below)
   197  func (s *coreStats) isPrometheus() bool { return s.statsdC == nil }
   198  
   199  // vs Collect()
   200  func (s *coreStats) promRLock() {
   201  	if s.isPrometheus() {
   202  		s.cmu.RLock()
   203  	}
   204  }
   205  
   206  func (s *coreStats) promRUnlock() {
   207  	if s.isPrometheus() {
   208  		s.cmu.RUnlock()
   209  	}
   210  }
   211  
   212  func (s *coreStats) promLock() {
   213  	if s.isPrometheus() {
   214  		s.cmu.Lock()
   215  	}
   216  }
   217  
   218  func (s *coreStats) promUnlock() {
   219  	if s.isPrometheus() {
   220  		s.cmu.Unlock()
   221  	}
   222  }
   223  
   224  // init MetricClient client: StatsD (default) or Prometheus
   225  func (s *coreStats) initMetricClient(node *meta.Snode, parent *runner) {
   226  	// Either Prometheus
   227  	if prom := os.Getenv("AIS_PROMETHEUS"); prom != "" {
   228  		nlog.Infoln("Using Prometheus")
   229  		prometheus.MustRegister(parent) // as prometheus.Collector
   230  		return
   231  	}
   232  
   233  	// or StatsD
   234  	var (
   235  		port  = 8125  // StatsD default port, see https://github.com/etsy/stats
   236  		probe = false // test-probe StatsD server at init time
   237  	)
   238  	if portStr := os.Getenv("AIS_STATSD_PORT"); portStr != "" {
   239  		if portNum, err := cmn.ParsePort(portStr); err != nil {
   240  			debug.AssertNoErr(err)
   241  			nlog.Errorln(err)
   242  		} else {
   243  			port = portNum
   244  		}
   245  	}
   246  	if probeStr := os.Getenv("AIS_STATSD_PROBE"); probeStr != "" {
   247  		if probeBool, err := cos.ParseBool(probeStr); err != nil {
   248  			nlog.Errorln(err)
   249  		} else {
   250  			probe = probeBool
   251  		}
   252  	}
   253  	id := strings.ReplaceAll(node.ID(), ":", "_") // ":" delineates name and value for StatsD
   254  	statsD, err := statsd.New("localhost", port, "ais"+node.Type()+"."+id, probe)
   255  	if err != nil {
   256  		nlog.Errorf("Starting up without StatsD: %v", err)
   257  	} else {
   258  		nlog.Infoln("Using StatsD")
   259  	}
   260  	s.statsdC = statsD
   261  }
   262  
   263  // populate *prometheus.Desc and statsValue.label.prom
   264  // NOTE: naming; compare with statsTracker.register()
   265  func (s *coreStats) initProm(node *meta.Snode) {
   266  	if !s.isPrometheus() {
   267  		return
   268  	}
   269  	id := strings.ReplaceAll(node.ID(), ".", "_")
   270  	for name, v := range s.Tracker {
   271  		var variableLabels []string
   272  		if isDiskMetric(name) {
   273  			// obtain prometheus specific disk-metric name from tracker name
   274  			// e.g. `disk.nvme0.read.bps` -> `disk.read.bps`.
   275  			_, name = extractPromDiskMetricName(name)
   276  			variableLabels = []string{diskMetricLabel}
   277  		}
   278  		label := strings.ReplaceAll(name, ".", "_")
   279  		// prometheus metrics names shouldn't include daemonID.
   280  		label = strings.ReplaceAll(label, "_"+id+"_", "_")
   281  		v.label.prom = strings.ReplaceAll(label, ":", "_")
   282  
   283  		help := v.kind
   284  		if strings.HasSuffix(v.label.prom, "_n") {
   285  			help = "total number of operations"
   286  		} else if strings.HasSuffix(v.label.prom, "_size") {
   287  			help = "total size (MB)"
   288  		} else if strings.HasSuffix(v.label.prom, "avg_rsize") {
   289  			help = "average read size (bytes)"
   290  		} else if strings.HasSuffix(v.label.prom, "avg_wsize") {
   291  			help = "average write size (bytes)"
   292  		} else if strings.HasSuffix(v.label.prom, "_ns") {
   293  			v.label.prom = strings.TrimSuffix(v.label.prom, "_ns") + "_ms"
   294  			help = "latency (milliseconds)"
   295  		} else if strings.Contains(v.label.prom, "_ns_") {
   296  			v.label.prom = strings.ReplaceAll(v.label.prom, "_ns_", "_ms_")
   297  			if name == Uptime {
   298  				v.label.prom = strings.ReplaceAll(v.label.prom, "_ns_", "")
   299  				help = "uptime (seconds)"
   300  			} else {
   301  				help = "latency (milliseconds)"
   302  			}
   303  		} else if strings.HasSuffix(v.label.prom, "_bps") {
   304  			v.label.prom = strings.TrimSuffix(v.label.prom, "_bps") + "_mbps"
   305  			help = "throughput (MB/s)"
   306  		}
   307  
   308  		fullqn := prometheus.BuildFQName("ais", node.Type(), v.label.prom)
   309  		// e.g. metric: ais_target_disk_avg_wsize{disk="nvme0n1",node_id="fqWt8081"}
   310  		s.promDesc[name] = prometheus.NewDesc(fullqn, help, variableLabels, prometheus.Labels{"node_id": id})
   311  	}
   312  }
   313  
   314  func (s *coreStats) updateUptime(d time.Duration) {
   315  	v := s.Tracker[Uptime]
   316  	ratomic.StoreInt64(&v.Value, d.Nanoseconds())
   317  }
   318  
   319  func (s *coreStats) MarshalJSON() ([]byte, error) { return jsoniter.Marshal(s.Tracker) }
   320  func (s *coreStats) UnmarshalJSON(b []byte) error { return jsoniter.Unmarshal(b, &s.Tracker) }
   321  
   322  func (s *coreStats) get(name string) (val int64) {
   323  	v := s.Tracker[name]
   324  	val = ratomic.LoadInt64(&v.Value)
   325  	return
   326  }
   327  
   328  func (s *coreStats) update(nv cos.NamedVal64) {
   329  	v, ok := s.Tracker[nv.Name]
   330  	debug.Assertf(ok, "invalid metric name %q", nv.Name)
   331  	switch v.kind {
   332  	case KindLatency:
   333  		ratomic.AddInt64(&v.numSamples, 1)
   334  		fallthrough
   335  	case KindThroughput:
   336  		ratomic.AddInt64(&v.Value, nv.Value)
   337  		ratomic.AddInt64(&v.cumulative, nv.Value)
   338  	case KindCounter, KindSize:
   339  		ratomic.AddInt64(&v.Value, nv.Value)
   340  		// - non-empty suffix forces an immediate Tx with no aggregation (see below);
   341  		// - suffix is an arbitrary string that can be defined at runtime;
   342  		// - e.g. usage: per-mountpath error counters.
   343  		if !s.isPrometheus() && nv.NameSuffix != "" {
   344  			s.statsdC.Send(v.label.comm+"."+nv.NameSuffix,
   345  				1, metric{Type: statsd.Counter, Name: "count", Value: nv.Value})
   346  		}
   347  	default:
   348  		debug.Assert(false, v.kind)
   349  	}
   350  }
   351  
   352  // log + StatsD (Prometheus is done separately via `Collect`)
   353  func (s *coreStats) copyT(out copyTracker, diskLowUtil ...int64) bool {
   354  	idle := true
   355  	intl := max(int64(s.statsTime.Seconds()), 1)
   356  	s.sgl.Reset()
   357  	for name, v := range s.Tracker {
   358  		switch v.kind {
   359  		case KindLatency:
   360  			var lat int64
   361  			if num := ratomic.SwapInt64(&v.numSamples, 0); num > 0 {
   362  				lat = ratomic.SwapInt64(&v.Value, 0) / num
   363  				if !ignore(name) {
   364  					idle = false
   365  				}
   366  			}
   367  			out[name] = copyValue{lat}
   368  			// NOTE: ns => ms, and not reporting zeros
   369  			millis := cos.DivRound(lat, int64(time.Millisecond))
   370  			if !s.isPrometheus() && millis > 0 {
   371  				s.statsdC.AppMetric(metric{Type: statsd.Timer, Name: v.label.stsd, Value: float64(millis)}, s.sgl)
   372  			}
   373  		case KindThroughput:
   374  			var throughput int64
   375  			if throughput = ratomic.SwapInt64(&v.Value, 0); throughput > 0 {
   376  				throughput /= intl
   377  				if !ignore(name) {
   378  					idle = false
   379  				}
   380  			}
   381  			out[name] = copyValue{throughput}
   382  			if !s.isPrometheus() && throughput > 0 {
   383  				fv := roundMBs(throughput)
   384  				s.statsdC.AppMetric(metric{Type: statsd.Gauge, Name: v.label.stsd, Value: fv}, s.sgl)
   385  			}
   386  		case KindComputedThroughput:
   387  			if throughput := ratomic.SwapInt64(&v.Value, 0); throughput > 0 {
   388  				out[name] = copyValue{throughput}
   389  				if !s.isPrometheus() {
   390  					fv := roundMBs(throughput)
   391  					s.statsdC.AppMetric(metric{Type: statsd.Gauge, Name: v.label.stsd, Value: fv}, s.sgl)
   392  				}
   393  			}
   394  		case KindCounter, KindSize:
   395  			var (
   396  				val     = ratomic.LoadInt64(&v.Value)
   397  				changed bool
   398  			)
   399  			if prev, ok := out[name]; !ok || prev.Value != val {
   400  				changed = true
   401  			}
   402  			if val > 0 {
   403  				out[name] = copyValue{val}
   404  				if changed && !ignore(name) {
   405  					idle = false
   406  				}
   407  			}
   408  			// StatsD iff changed
   409  			if !s.isPrometheus() && changed {
   410  				if v.kind == KindCounter {
   411  					s.statsdC.AppMetric(metric{Type: statsd.Counter, Name: v.label.stsd, Value: val}, s.sgl)
   412  				} else {
   413  					// target only suffix
   414  					metricType := statsd.Counter
   415  					if v.label.comm == "dl" {
   416  						metricType = statsd.PersistentCounter
   417  					}
   418  					s.statsdC.AppMetric(metric{Type: metricType, Name: v.label.stsd, Value: float64(val)}, s.sgl)
   419  				}
   420  			}
   421  		case KindGauge:
   422  			val := ratomic.LoadInt64(&v.Value)
   423  			out[name] = copyValue{val}
   424  			if !s.isPrometheus() {
   425  				s.statsdC.AppMetric(metric{Type: statsd.Gauge, Name: v.label.stsd, Value: float64(val)}, s.sgl)
   426  			}
   427  			if isDiskUtilMetric(name) && val > diskLowUtil[0] {
   428  				idle = false
   429  			}
   430  		default:
   431  			out[name] = copyValue{ratomic.LoadInt64(&v.Value)}
   432  		}
   433  	}
   434  	if !s.isPrometheus() {
   435  		s.statsdC.SendSGL(s.sgl)
   436  	}
   437  	return idle
   438  }
   439  
   440  // REST API what=stats query
   441  // NOTE: not reporting zero counts
   442  func (s *coreStats) copyCumulative(ctracker copyTracker) {
   443  	for name, v := range s.Tracker {
   444  		switch v.kind {
   445  		case KindLatency:
   446  			ctracker[name] = copyValue{ratomic.LoadInt64(&v.cumulative)}
   447  		case KindThroughput:
   448  			val := copyValue{ratomic.LoadInt64(&v.cumulative)}
   449  			ctracker[name] = val
   450  
   451  			// NOTE: effectively, add same-value metric that was never added/updated
   452  			// via `runner.Add` and friends. Is OK to replace ".bps" suffix
   453  			// as statsValue.cumulative _is_ the total size (aka, KindSize)
   454  			n := name[:len(name)-3] + "size"
   455  			ctracker[n] = val
   456  		case KindCounter, KindSize:
   457  			if val := ratomic.LoadInt64(&v.Value); val > 0 {
   458  				ctracker[name] = copyValue{val}
   459  			}
   460  		default: // KindSpecial, KindComputedThroughput, KindGauge
   461  			ctracker[name] = copyValue{ratomic.LoadInt64(&v.Value)}
   462  		}
   463  	}
   464  }
   465  
   466  func (s *coreStats) reset(errorsOnly bool) {
   467  	if errorsOnly {
   468  		for name, v := range s.Tracker {
   469  			if IsErrMetric(name) {
   470  				debug.Assert(v.kind == KindCounter || v.kind == KindSize, name)
   471  				ratomic.StoreInt64(&v.Value, 0)
   472  			}
   473  		}
   474  		return
   475  	}
   476  
   477  	for _, v := range s.Tracker {
   478  		switch v.kind {
   479  		case KindLatency:
   480  			ratomic.StoreInt64(&v.numSamples, 0)
   481  			fallthrough
   482  		case KindThroughput:
   483  			ratomic.StoreInt64(&v.Value, 0)
   484  			ratomic.StoreInt64(&v.cumulative, 0)
   485  		case KindCounter, KindSize, KindComputedThroughput, KindGauge:
   486  			ratomic.StoreInt64(&v.Value, 0)
   487  		default: // KindSpecial - do nothing
   488  		}
   489  	}
   490  }
   491  
   492  ////////////////
   493  // statsValue //
   494  ////////////////
   495  
   496  // interface guard
   497  var (
   498  	_ json.Marshaler   = (*statsValue)(nil)
   499  	_ json.Unmarshaler = (*statsValue)(nil)
   500  )
   501  
   502  func (v *statsValue) MarshalJSON() ([]byte, error) {
   503  	s := strconv.FormatInt(ratomic.LoadInt64(&v.Value), 10)
   504  	return cos.UnsafeB(s), nil
   505  }
   506  
   507  func (v *statsValue) UnmarshalJSON(b []byte) error { return jsoniter.Unmarshal(b, &v.Value) }
   508  
   509  ///////////////
   510  // copyValue //
   511  ///////////////
   512  
   513  // interface guard
   514  var (
   515  	_ json.Marshaler   = (*copyValue)(nil)
   516  	_ json.Unmarshaler = (*copyValue)(nil)
   517  )
   518  
   519  func (v copyValue) MarshalJSON() (b []byte, err error) { return jsoniter.Marshal(v.Value) }
   520  func (v *copyValue) UnmarshalJSON(b []byte) error      { return jsoniter.Unmarshal(b, &v.Value) }
   521  
   522  /////////////////
   523  // copyTracker //
   524  /////////////////
   525  
   526  // serialize itself (slightly more efficiently than JSON)
   527  func (ctracker copyTracker) write(sgl *memsys.SGL, sorted []string, target, idle bool) {
   528  	var (
   529  		next  bool
   530  		disks bool // whether to write target disk metrics
   531  	)
   532  	if len(sorted) == 0 {
   533  		for n := range ctracker {
   534  			sorted = append(sorted, n)
   535  		}
   536  		sort.Strings(sorted)
   537  	}
   538  	sgl.WriteByte('{')
   539  	for _, n := range sorted {
   540  		v := ctracker[n]
   541  		// exclude
   542  		if v.Value == 0 || n == Uptime { // always skip zeros and uptime
   543  			continue
   544  		}
   545  		if isDiskMetric(n) {
   546  			if isDiskUtilMetric(n) && v.Value > minLogDiskUtil {
   547  				disks = true // not idle - all all
   548  			}
   549  			continue
   550  		}
   551  		if idle && n == KeepAliveLatency {
   552  			continue
   553  		}
   554  		// add
   555  		if next {
   556  			sgl.WriteByte(',')
   557  		}
   558  		sgl.Write(cos.UnsafeB(n))
   559  		sgl.WriteByte(':')
   560  		sgl.Write(cos.UnsafeB(strconv.FormatInt(v.Value, 10))) // raw value
   561  		next = true
   562  	}
   563  	if disks {
   564  		debug.Assert(target)
   565  		for n, v := range ctracker {
   566  			if v.Value == 0 || !isDiskMetric(n) {
   567  				continue
   568  			}
   569  			sgl.WriteByte(',')
   570  			sgl.Write(cos.UnsafeB(n))
   571  			sgl.WriteByte(':')
   572  			sgl.Write(cos.UnsafeB(strconv.FormatInt(v.Value, 10))) // ditto
   573  		}
   574  	}
   575  	sgl.WriteByte('}')
   576  }
   577  
   578  ////////////
   579  // runner //
   580  ////////////
   581  
   582  // interface guard
   583  var (
   584  	_ prometheus.Collector = (*runner)(nil)
   585  )
   586  
   587  func (r *runner) GetStats() *Node {
   588  	ctracker := make(copyTracker, 48)
   589  	r.core.copyCumulative(ctracker)
   590  	return &Node{Tracker: ctracker}
   591  }
   592  
   593  func (r *runner) GetStatsV322() (out *NodeV322) {
   594  	ds := r.GetStats()
   595  
   596  	out = &NodeV322{}
   597  	out.Snode = ds.Snode
   598  	out.Tracker = ds.Tracker
   599  	return out
   600  }
   601  
   602  func (r *runner) ResetStats(errorsOnly bool) {
   603  	r.core.reset(errorsOnly)
   604  }
   605  
   606  func (r *runner) GetMetricNames() cos.StrKVs {
   607  	out := make(cos.StrKVs, 32)
   608  	for name, v := range r.core.Tracker {
   609  		out[name] = v.kind
   610  	}
   611  	return out
   612  }
   613  
   614  // common (target, proxy) metrics
   615  func (r *runner) regCommon(node *meta.Snode) {
   616  	// basic counters
   617  	r.reg(node, GetCount, KindCounter)
   618  	r.reg(node, PutCount, KindCounter)
   619  	r.reg(node, AppendCount, KindCounter)
   620  	r.reg(node, DeleteCount, KindCounter)
   621  	r.reg(node, RenameCount, KindCounter)
   622  	r.reg(node, ListCount, KindCounter)
   623  
   624  	// basic error counters, respectively
   625  	r.reg(node, errPrefix+GetCount, KindCounter)
   626  	r.reg(node, errPrefix+PutCount, KindCounter)
   627  	r.reg(node, errPrefix+AppendCount, KindCounter)
   628  	r.reg(node, errPrefix+DeleteCount, KindCounter)
   629  	r.reg(node, errPrefix+RenameCount, KindCounter)
   630  	r.reg(node, errPrefix+ListCount, KindCounter)
   631  
   632  	// more error counters
   633  	r.reg(node, ErrHTTPWriteCount, KindCounter)
   634  	r.reg(node, ErrDownloadCount, KindCounter)
   635  	r.reg(node, ErrPutMirrorCount, KindCounter)
   636  
   637  	// latency
   638  	r.reg(node, GetLatency, KindLatency)
   639  	r.reg(node, ListLatency, KindLatency)
   640  	r.reg(node, KeepAliveLatency, KindLatency)
   641  
   642  	// special uptime
   643  	r.reg(node, Uptime, KindSpecial)
   644  }
   645  
   646  // NOTE naming convention: ".n" for the count and ".ns" for duration (nanoseconds)
   647  // compare with coreStats.initProm()
   648  func (r *runner) reg(node *meta.Snode, name, kind string) {
   649  	v := &statsValue{kind: kind}
   650  	// in StatsD metrics ":" delineates the name and the value - replace with underscore
   651  	switch kind {
   652  	case KindCounter:
   653  		debug.Assert(strings.HasSuffix(name, ".n"), name) // naming convention
   654  		v.label.comm = strings.TrimSuffix(name, ".n")
   655  		v.label.comm = strings.ReplaceAll(v.label.comm, ":", "_")
   656  		v.label.stsd = fmt.Sprintf("%s.%s.%s.%s", "ais"+node.Type(), node.ID(), v.label.comm, "count")
   657  	case KindSize:
   658  		debug.Assert(strings.HasSuffix(name, ".size"), name) // naming convention
   659  		v.label.comm = strings.TrimSuffix(name, ".size")
   660  		v.label.comm = strings.ReplaceAll(v.label.comm, ":", "_")
   661  		v.label.stsd = fmt.Sprintf("%s.%s.%s.%s", "ais"+node.Type(), node.ID(), v.label.comm, "mbytes")
   662  	case KindLatency:
   663  		debug.Assert(strings.Contains(name, ".ns"), name) // ditto
   664  		v.label.comm = strings.TrimSuffix(name, ".ns")
   665  		v.label.comm = strings.ReplaceAll(v.label.comm, ".ns.", ".")
   666  		v.label.comm = strings.ReplaceAll(v.label.comm, ":", "_")
   667  		v.label.stsd = fmt.Sprintf("%s.%s.%s.%s", "ais"+node.Type(), node.ID(), v.label.comm, "ms")
   668  	case KindThroughput, KindComputedThroughput:
   669  		debug.Assert(strings.HasSuffix(name, ".bps"), name) // ditto
   670  		v.label.comm = strings.TrimSuffix(name, ".bps")
   671  		v.label.comm = strings.ReplaceAll(v.label.comm, ":", "_")
   672  		v.label.stsd = fmt.Sprintf("%s.%s.%s.%s", "ais"+node.Type(), node.ID(), v.label.comm, "mbps")
   673  	default:
   674  		debug.Assert(kind == KindGauge || kind == KindSpecial)
   675  		v.label.comm = name
   676  		v.label.comm = strings.ReplaceAll(v.label.comm, ":", "_")
   677  		if name == Uptime {
   678  			v.label.comm = strings.ReplaceAll(v.label.comm, ".ns.", ".")
   679  			v.label.stsd = fmt.Sprintf("%s.%s.%s.%s", "ais"+node.Type(), node.ID(), v.label.comm, "seconds")
   680  		} else {
   681  			v.label.stsd = fmt.Sprintf("%s.%s.%s", "ais"+node.Type(), node.ID(), v.label.comm)
   682  		}
   683  	}
   684  	r.core.Tracker[name] = v
   685  }
   686  
   687  //
   688  // as cos.StatsUpdater
   689  //
   690  
   691  func (r *runner) Add(name string, val int64) {
   692  	r.core.update(cos.NamedVal64{Name: name, Value: val})
   693  }
   694  
   695  func (r *runner) Inc(name string) {
   696  	r.core.update(cos.NamedVal64{Name: name, Value: 1})
   697  }
   698  
   699  func (r *runner) IncErr(metric string) {
   700  	if IsErrMetric(metric) {
   701  		r.core.update(cos.NamedVal64{Name: metric, Value: 1})
   702  	} else { // e.g. "err." + GetCount
   703  		r.core.update(cos.NamedVal64{Name: errPrefix + metric, Value: 1})
   704  	}
   705  }
   706  
   707  func (r *runner) AddMany(nvs ...cos.NamedVal64) {
   708  	for _, nv := range nvs {
   709  		r.core.update(nv)
   710  	}
   711  }
   712  
   713  func (r *runner) IsPrometheus() bool { return r.core.isPrometheus() }
   714  
   715  func (r *runner) Describe(ch chan<- *prometheus.Desc) {
   716  	for _, desc := range r.core.promDesc {
   717  		ch <- desc
   718  	}
   719  }
   720  
   721  func (r *runner) Collect(ch chan<- prometheus.Metric) {
   722  	if !r.StartedUp() {
   723  		return
   724  	}
   725  	r.core.promRLock()
   726  	for name, v := range r.core.Tracker {
   727  		var (
   728  			val int64
   729  			fv  float64
   730  
   731  			variableLabels []string
   732  		)
   733  		copyV, okc := r.ctracker[name]
   734  		if !okc {
   735  			continue
   736  		}
   737  		val = copyV.Value
   738  		fv = float64(val)
   739  		// 1. convert units
   740  		switch v.kind {
   741  		case KindCounter:
   742  			// do nothing
   743  		case KindSize:
   744  			fv = float64(val)
   745  		case KindLatency:
   746  			millis := cos.DivRound(val, int64(time.Millisecond))
   747  			fv = float64(millis)
   748  		case KindThroughput:
   749  			fv = roundMBs(val)
   750  		default:
   751  			if name == Uptime {
   752  				seconds := cos.DivRound(val, int64(time.Second))
   753  				fv = float64(seconds)
   754  			}
   755  		}
   756  		// 2. convert kind
   757  		promMetricType := prometheus.GaugeValue
   758  		if v.kind == KindCounter || v.kind == KindSize {
   759  			promMetricType = prometheus.CounterValue
   760  		}
   761  		if isDiskMetric(name) {
   762  			var diskName string
   763  			diskName, name = extractPromDiskMetricName(name)
   764  			variableLabels = []string{diskName}
   765  		}
   766  		// 3. publish
   767  		desc, ok := r.core.promDesc[name]
   768  		debug.Assert(ok, name)
   769  		m, err := prometheus.NewConstMetric(desc, promMetricType, fv, variableLabels...)
   770  		debug.AssertNoErr(err)
   771  		ch <- m
   772  	}
   773  	r.core.promRUnlock()
   774  }
   775  
   776  func (r *runner) Name() string { return r.name }
   777  
   778  func (r *runner) Get(name string) (val int64) { return r.core.get(name) }
   779  
   780  func (r *runner) _run(logger statsLogger /* Prunner or Trunner */) error {
   781  	var (
   782  		i, j, k time.Duration
   783  		sleep   = startupSleep
   784  		ticker  = time.NewTicker(sleep)
   785  
   786  		// NOTE: the maximum time we agree to wait for r.daemon.ClusterStarted()
   787  		config   = cmn.GCO.Get()
   788  		deadline = config.Timeout.JoinAtStartup.D()
   789  	)
   790  	if logger.standingBy() {
   791  		deadline = 24 * time.Hour
   792  	} else if deadline == 0 {
   793  		deadline = 2 * config.Timeout.Startup.D()
   794  	}
   795  waitStartup:
   796  	for {
   797  		select {
   798  		case <-r.stopCh:
   799  			ticker.Stop()
   800  			return nil
   801  		case <-ticker.C:
   802  			k += sleep
   803  			if k >= config.Periodic.StatsTime.D() {
   804  				nlog.Flush(nlog.ActNone)
   805  				k = 0
   806  			}
   807  			if r.daemon.ClusterStarted() {
   808  				break waitStartup
   809  			}
   810  			if logger.standingBy() && sleep == startupSleep /*first time*/ {
   811  				sleep = config.Periodic.StatsTime.D()
   812  				ticker.Reset(sleep)
   813  				deadline = time.Hour
   814  
   815  				nlog.Infoln(r.Name() + ": standing by...")
   816  				continue
   817  			}
   818  			j += sleep
   819  			if j > deadline {
   820  				ticker.Stop()
   821  				return cmn.ErrStartupTimeout
   822  			}
   823  			i += sleep
   824  			if i > config.Timeout.Startup.D() && !logger.standingBy() {
   825  				nlog.Errorln(r.Name() + ": " + cmn.StartupMayTimeout)
   826  				i = 0
   827  			}
   828  		}
   829  	}
   830  	ticker.Stop()
   831  
   832  	config = cmn.GCO.Get()
   833  	goMaxProcs := runtime.GOMAXPROCS(0)
   834  	nlog.Infof("Starting %s", r.Name())
   835  	hk.Reg(r.Name()+"-logs"+hk.NameSuffix, recycleLogs, maxLogSizeCheckTime)
   836  
   837  	statsTime := config.Periodic.StatsTime.D() // (NOTE: not to confuse with config.Log.StatsTime)
   838  	r.ticker = time.NewTicker(statsTime)
   839  	r.startedUp.Store(true)
   840  	var (
   841  		checkNumGorHigh   int64
   842  		startTime         = mono.NanoTime() // uptime henceforth
   843  		lastDateTimestamp = startTime
   844  	)
   845  	for {
   846  		select {
   847  		case <-r.ticker.C:
   848  			now := mono.NanoTime()
   849  			config = cmn.GCO.Get()
   850  			logger.log(now, time.Duration(now-startTime) /*uptime*/, config)
   851  			checkNumGorHigh = _whingeGoroutines(now, checkNumGorHigh, goMaxProcs)
   852  
   853  			if statsTime != config.Periodic.StatsTime.D() {
   854  				statsTime = config.Periodic.StatsTime.D()
   855  				r.ticker.Reset(statsTime)
   856  				logger.statsTime(statsTime)
   857  			}
   858  			// stats runner is now solely responsible to flush the logs
   859  			// both periodically and on (OOB) demand
   860  			flushTime := dfltPeriodicFlushTime
   861  			if config.Log.FlushTime != 0 {
   862  				flushTime = config.Log.FlushTime.D()
   863  			}
   864  			if nlog.Since() > flushTime || nlog.OOB() {
   865  				nlog.Flush(nlog.ActNone)
   866  			}
   867  
   868  			now = mono.NanoTime()
   869  			if time.Duration(now-lastDateTimestamp) > dfltPeriodicTimeStamp {
   870  				nlog.Infoln(cos.FormatTime(time.Now(), "" /* RFC822 */) + " =============")
   871  				lastDateTimestamp = now
   872  			}
   873  		case <-r.stopCh:
   874  			r.ticker.Stop()
   875  			return nil
   876  		}
   877  	}
   878  }
   879  
   880  func _whingeGoroutines(now, checkNumGorHigh int64, goMaxProcs int) int64 {
   881  	var (
   882  		ngr     = runtime.NumGoroutine()
   883  		extreme bool
   884  	)
   885  	if ngr < goMaxProcs*numGorHigh {
   886  		return 0
   887  	}
   888  	if ngr >= goMaxProcs*numGorExtreme {
   889  		extreme = true
   890  		nlog.Errorf("Extremely high number of goroutines: %d", ngr)
   891  	}
   892  	if checkNumGorHigh == 0 {
   893  		checkNumGorHigh = now
   894  	} else if time.Duration(now-checkNumGorHigh) > numGorHighCheckTime {
   895  		if !extreme {
   896  			nlog.Warningf("High number of goroutines: %d", ngr)
   897  		}
   898  		checkNumGorHigh = 0
   899  	}
   900  	return checkNumGorHigh
   901  }
   902  
   903  func (r *runner) StartedUp() bool { return r.startedUp.Load() }
   904  
   905  func (r *runner) Stop(err error) {
   906  	nlog.Infof("Stopping %s, err: %v", r.Name(), err)
   907  	r.stopCh <- struct{}{}
   908  	if !r.IsPrometheus() {
   909  		r.core.statsdC.Close()
   910  	}
   911  	close(r.stopCh)
   912  }
   913  
   914  func recycleLogs() time.Duration {
   915  	// keep total log size below the configured max
   916  	go removeLogs(cmn.GCO.Get())
   917  	return maxLogSizeCheckTime
   918  }
   919  
   920  func removeLogs(config *cmn.Config) {
   921  	maxtotal := int64(config.Log.MaxTotal)
   922  	dentries, err := os.ReadDir(config.LogDir)
   923  	if err != nil {
   924  		nlog.Errorf("GC logs: cannot read log dir %s, err: %v", config.LogDir, err)
   925  		_ = cos.CreateDir(config.LogDir) // FIXME: (local non-containerized + kill/restart under test)
   926  		return
   927  	}
   928  	for _, logtype := range logtypes {
   929  		var tot int64
   930  		finfos := make([]rfs.FileInfo, 0, len(dentries))
   931  		for _, dent := range dentries {
   932  			if dent.IsDir() || !dent.Type().IsRegular() {
   933  				continue
   934  			}
   935  			if n := dent.Name(); !strings.Contains(n, ".log.") || !strings.Contains(n, logtype) {
   936  				continue
   937  			}
   938  			if finfo, err := dent.Info(); err == nil {
   939  				tot += finfo.Size()
   940  				finfos = append(finfos, finfo)
   941  			}
   942  		}
   943  		if tot > maxtotal {
   944  			removeOlderLogs(tot, maxtotal, config.LogDir, logtype, finfos)
   945  		}
   946  	}
   947  }
   948  
   949  func removeOlderLogs(tot, maxtotal int64, logdir, logtype string, filteredInfos []rfs.FileInfo) {
   950  	const prefix = "GC logs"
   951  	l := len(filteredInfos)
   952  	if l <= 1 {
   953  		nlog.Warningf("%s: cannot cleanup %s, dir %s, tot %d, max %d", prefix, logtype, logdir, tot, maxtotal)
   954  		return
   955  	}
   956  	fiLess := func(i, j int) bool {
   957  		return filteredInfos[i].ModTime().Before(filteredInfos[j].ModTime())
   958  	}
   959  
   960  	verbose := cmn.Rom.FastV(4, cos.SmoduleStats)
   961  	if verbose {
   962  		nlog.Infoln(prefix + ": started")
   963  	}
   964  	sort.Slice(filteredInfos, fiLess)
   965  	filteredInfos = filteredInfos[:l-1] // except the last = current
   966  	for _, logfi := range filteredInfos {
   967  		logfqn := filepath.Join(logdir, logfi.Name())
   968  		if err := cos.RemoveFile(logfqn); err == nil {
   969  			tot -= logfi.Size()
   970  			if verbose {
   971  				nlog.Infof("%s: removed %s", prefix, logfqn)
   972  			}
   973  			if tot < maxtotal {
   974  				break
   975  			}
   976  		} else {
   977  			nlog.Errorf("%s: failed to remove %s", prefix, logfqn)
   978  		}
   979  	}
   980  	if verbose {
   981  		nlog.Infoln(prefix + ": done")
   982  	}
   983  }
   984  
   985  // debug.NewExpvar & debug.SetExpvar to visualize:
   986  // * all counters including errors
   987  // * latencies including keepalive
   988  // * mountpath capacities
   989  // * mountpath (disk) utilizations
   990  // * total number of goroutines, etc.
   991  // (access via host:port/debug/vars in debug mode)
   992  
   993  // reusable sgl => (udp) => StatsD