github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/instrument/extended.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package instrument
    22  
    23  import (
    24  	"fmt"
    25  	"runtime"
    26  	"strings"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	xerrors "github.com/m3db/m3/src/x/errors"
    31  
    32  	"github.com/uber-go/tally"
    33  )
    34  
    35  // ExtendedMetricsType is a type of extended metrics to report.
    36  type ExtendedMetricsType int
    37  
    38  const (
    39  	// NoExtendedMetrics describes no extended metrics.
    40  	NoExtendedMetrics ExtendedMetricsType = iota
    41  
    42  	// SimpleExtendedMetrics describes just a simple level of extended metrics:
    43  	// - number of active goroutines
    44  	// - number of configured gomaxprocs
    45  	SimpleExtendedMetrics
    46  
    47  	// ModerateExtendedMetrics describes a moderately verbose level of extended metrics:
    48  	// - number of active goroutines
    49  	// - number of configured gomaxprocs
    50  	// - number of file descriptors
    51  	ModerateExtendedMetrics
    52  
    53  	// DetailedExtendedMetrics describes a detailed level of extended metrics:
    54  	// - number of active goroutines
    55  	// - number of configured gomaxprocs
    56  	// - number of file descriptors
    57  	// - memory allocated running count
    58  	// - memory used by heap
    59  	// - memory used by heap that is idle
    60  	// - memory used by heap that is in use
    61  	// - memory used by stack
    62  	// - number of garbage collections
    63  	// - GC pause times
    64  	DetailedExtendedMetrics
    65  
    66  	// DetailedGoRuntimeMetrics reports all detailed metrics, sans FD metrics to save CPU
    67  	// if in-use file descriptors are measured by an external system, like cAdvisor.
    68  	DetailedGoRuntimeMetrics
    69  
    70  	// DefaultExtendedMetricsType is the default extended metrics level.
    71  	DefaultExtendedMetricsType = SimpleExtendedMetrics
    72  )
    73  
    74  var (
    75  	validExtendedMetricsTypes = []ExtendedMetricsType{
    76  		NoExtendedMetrics,
    77  		SimpleExtendedMetrics,
    78  		ModerateExtendedMetrics,
    79  		DetailedExtendedMetrics,
    80  		DetailedGoRuntimeMetrics,
    81  	}
    82  )
    83  
    84  func (t ExtendedMetricsType) String() string {
    85  	switch t {
    86  	case NoExtendedMetrics:
    87  		return "none"
    88  	case SimpleExtendedMetrics:
    89  		return "simple"
    90  	case ModerateExtendedMetrics:
    91  		return "moderate"
    92  	case DetailedExtendedMetrics:
    93  		return "detailed"
    94  	case DetailedGoRuntimeMetrics:
    95  		return "runtime"
    96  	}
    97  	return "unknown"
    98  }
    99  
   100  // MarshalYAML marshals an ExtendedMetricsType.
   101  func (t *ExtendedMetricsType) MarshalYAML() (interface{}, error) {
   102  	return t.String(), nil
   103  }
   104  
   105  // UnmarshalYAML unmarshals an ExtendedMetricsType into a valid type from string.
   106  func (t *ExtendedMetricsType) UnmarshalYAML(unmarshal func(interface{}) error) error {
   107  	var str string
   108  	if err := unmarshal(&str); err != nil {
   109  		return err
   110  	}
   111  	if str == "" {
   112  		*t = DefaultExtendedMetricsType
   113  		return nil
   114  	}
   115  	strs := make([]string, 0, len(validExtendedMetricsTypes))
   116  	for _, valid := range validExtendedMetricsTypes {
   117  		if str == valid.String() {
   118  			*t = valid
   119  			return nil
   120  		}
   121  		strs = append(strs, "'"+valid.String()+"'")
   122  	}
   123  	return fmt.Errorf("invalid ExtendedMetricsType '%s' valid types are: %s",
   124  		str, strings.Join(strs, ", "))
   125  }
   126  
   127  // StartReportingExtendedMetrics creates a extend metrics reporter and starts
   128  // the reporter returning it so it may be stopped if successfully started.
   129  func StartReportingExtendedMetrics(
   130  	scope tally.Scope,
   131  	reportInterval time.Duration,
   132  	metricsType ExtendedMetricsType,
   133  ) (Reporter, error) {
   134  	reporter := NewExtendedMetricsReporter(scope, reportInterval, metricsType)
   135  	if err := reporter.Start(); err != nil {
   136  		return nil, err
   137  	}
   138  	return reporter, nil
   139  }
   140  
   141  type runtimeMetrics struct {
   142  	NumGoRoutines   tally.Gauge
   143  	GoMaxProcs      tally.Gauge
   144  	MemoryAllocated tally.Gauge
   145  	MemoryHeap      tally.Gauge
   146  	MemoryHeapIdle  tally.Gauge
   147  	MemoryHeapInuse tally.Gauge
   148  	MemoryStack     tally.Gauge
   149  	GCCPUFraction   tally.Gauge
   150  	NumGC           tally.Counter
   151  	GcPauseMs       tally.Timer
   152  	lastNumGC       uint32
   153  }
   154  
   155  func (r *runtimeMetrics) report(metricsType ExtendedMetricsType) {
   156  	if metricsType == NoExtendedMetrics {
   157  		return
   158  	}
   159  
   160  	r.NumGoRoutines.Update(float64(runtime.NumGoroutine()))
   161  	r.GoMaxProcs.Update(float64(runtime.GOMAXPROCS(0)))
   162  	if metricsType < DetailedExtendedMetrics {
   163  		return
   164  	}
   165  
   166  	var memStats runtime.MemStats
   167  	runtime.ReadMemStats(&memStats)
   168  	r.MemoryAllocated.Update(float64(memStats.Alloc))
   169  	r.MemoryHeap.Update(float64(memStats.HeapAlloc))
   170  	r.MemoryHeapIdle.Update(float64(memStats.HeapIdle))
   171  	r.MemoryHeapInuse.Update(float64(memStats.HeapInuse))
   172  	r.MemoryStack.Update(float64(memStats.StackInuse))
   173  	r.GCCPUFraction.Update(memStats.GCCPUFraction)
   174  
   175  	// memStats.NumGC is a perpetually incrementing counter (unless it wraps at 2^32).
   176  	num := memStats.NumGC
   177  	lastNum := atomic.SwapUint32(&r.lastNumGC, num)
   178  	if delta := num - lastNum; delta > 0 {
   179  		r.NumGC.Inc(int64(delta))
   180  		if delta > 255 {
   181  			// too many GCs happened, the timestamps buffer got wrapped around. Report only the last 256.
   182  			lastNum = num - 256
   183  		}
   184  		for i := lastNum; i != num; i++ {
   185  			pause := memStats.PauseNs[i%256]
   186  			r.GcPauseMs.Record(time.Duration(pause))
   187  		}
   188  	}
   189  }
   190  
   191  type extendedMetricsReporter struct {
   192  	baseReporter
   193  	processReporter Reporter
   194  
   195  	metricsType ExtendedMetricsType
   196  	runtime     runtimeMetrics
   197  }
   198  
   199  // NewExtendedMetricsReporter creates a new extended metrics reporter
   200  // that reports runtime and process metrics.
   201  func NewExtendedMetricsReporter(
   202  	scope tally.Scope,
   203  	reportInterval time.Duration,
   204  	metricsType ExtendedMetricsType,
   205  ) Reporter {
   206  	var (
   207  		r                     = new(extendedMetricsReporter)
   208  		enableProcessReporter bool
   209  	)
   210  
   211  	r.metricsType = metricsType
   212  	r.init(reportInterval, func() {
   213  		r.runtime.report(r.metricsType)
   214  	})
   215  
   216  	if r.metricsType == NoExtendedMetrics {
   217  		return r
   218  	}
   219  
   220  	switch r.metricsType {
   221  	case ModerateExtendedMetrics:
   222  		enableProcessReporter = true
   223  	case DetailedExtendedMetrics:
   224  		enableProcessReporter = true
   225  	default:
   226  		enableProcessReporter = false
   227  	}
   228  
   229  	if enableProcessReporter {
   230  		// ProcessReporter can be quite slow in some situations (specifically
   231  		// counting FDs for processes that have many of them) so it runs on
   232  		// its own report loop.
   233  		r.processReporter = NewProcessReporter(scope, reportInterval)
   234  	}
   235  
   236  	runtimeScope := scope.SubScope("runtime")
   237  	r.runtime.NumGoRoutines = runtimeScope.Gauge("num-goroutines")
   238  	r.runtime.GoMaxProcs = runtimeScope.Gauge("gomaxprocs")
   239  	if r.metricsType < DetailedExtendedMetrics {
   240  		return r
   241  	}
   242  
   243  	var memstats runtime.MemStats
   244  	runtime.ReadMemStats(&memstats)
   245  	memoryScope := runtimeScope.SubScope("memory")
   246  	r.runtime.MemoryAllocated = memoryScope.Gauge("allocated")
   247  	r.runtime.MemoryHeap = memoryScope.Gauge("heap")
   248  	r.runtime.MemoryHeapIdle = memoryScope.Gauge("heapidle")
   249  	r.runtime.MemoryHeapInuse = memoryScope.Gauge("heapinuse")
   250  	r.runtime.MemoryStack = memoryScope.Gauge("stack")
   251  	r.runtime.GCCPUFraction = memoryScope.Gauge("gc-cpu-fraction")
   252  	r.runtime.NumGC = memoryScope.Counter("num-gc")
   253  	r.runtime.GcPauseMs = memoryScope.Timer("gc-pause-ms")
   254  	r.runtime.lastNumGC = memstats.NumGC
   255  
   256  	return r
   257  }
   258  
   259  func (e *extendedMetricsReporter) Start() error {
   260  	if err := e.baseReporter.Start(); err != nil {
   261  		return err
   262  	}
   263  
   264  	if e.processReporter != nil {
   265  		if err := e.processReporter.Start(); err != nil {
   266  			return err
   267  		}
   268  	}
   269  
   270  	return nil
   271  }
   272  
   273  func (e *extendedMetricsReporter) Stop() error {
   274  	multiErr := xerrors.NewMultiError()
   275  
   276  	if err := e.baseReporter.Stop(); err != nil {
   277  		multiErr = multiErr.Add(err)
   278  	}
   279  
   280  	if e.processReporter != nil {
   281  		if err := e.processReporter.Stop(); err != nil {
   282  			multiErr = multiErr.Add(err)
   283  		}
   284  	}
   285  
   286  	return multiErr.FinalError()
   287  }