github.com/m3db/m3@v1.5.0/src/x/instrument/extended.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package instrument
    22  
    23  import (
    24  	"fmt"
    25  	"runtime"
    26  	"strings"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	xerrors "github.com/m3db/m3/src/x/errors"
    31  
    32  	"github.com/uber-go/tally"
    33  )
    34  
    35  // ExtendedMetricsType is a type of extended metrics to report.
    36  type ExtendedMetricsType int
    37  
    38  const (
    39  	// NoExtendedMetrics describes no extended metrics.
    40  	NoExtendedMetrics ExtendedMetricsType = iota
    41  
    42  	// SimpleExtendedMetrics describes just a simple level of extended metrics:
    43  	// - number of active goroutines
    44  	// - number of configured gomaxprocs
    45  	SimpleExtendedMetrics
    46  
    47  	// ModerateExtendedMetrics describes a moderately verbose level of extended metrics:
    48  	// - number of active goroutines
    49  	// - number of configured gomaxprocs
    50  	// - number of file descriptors
    51  	ModerateExtendedMetrics
    52  
    53  	// DetailedExtendedMetrics describes a detailed level of extended metrics:
    54  	// - number of active goroutines
    55  	// - number of configured gomaxprocs
    56  	// - number of file descriptors
    57  	// - memory allocated running count
    58  	// - memory used by heap
    59  	// - memory used by heap that is idle
    60  	// - memory used by heap that is in use
    61  	// - memory used by stack
    62  	// - number of garbage collections
    63  	// - GC pause times
    64  	DetailedExtendedMetrics
    65  
    66  	// DetailedGoRuntimeMetrics reports all detailed metrics, sans FD metrics to save CPU
    67  	// if in-use file descriptors are measured by an external system, like cAdvisor.
    68  	DetailedGoRuntimeMetrics
    69  
    70  	// DefaultExtendedMetricsType is the default extended metrics level.
    71  	DefaultExtendedMetricsType = SimpleExtendedMetrics
    72  )
    73  
    74  var (
    75  	validExtendedMetricsTypes = []ExtendedMetricsType{
    76  		NoExtendedMetrics,
    77  		SimpleExtendedMetrics,
    78  		ModerateExtendedMetrics,
    79  		DetailedExtendedMetrics,
    80  		DetailedGoRuntimeMetrics,
    81  	}
    82  )
    83  
    84  func (t ExtendedMetricsType) String() string {
    85  	switch t {
    86  	case NoExtendedMetrics:
    87  		return "none"
    88  	case SimpleExtendedMetrics:
    89  		return "simple"
    90  	case ModerateExtendedMetrics:
    91  		return "moderate"
    92  	case DetailedExtendedMetrics:
    93  		return "detailed"
    94  	case DetailedGoRuntimeMetrics:
    95  		return "runtime"
    96  	}
    97  	return "unknown"
    98  }
    99  
   100  // UnmarshalYAML unmarshals an ExtendedMetricsType into a valid type from string.
   101  func (t *ExtendedMetricsType) UnmarshalYAML(unmarshal func(interface{}) error) error {
   102  	var str string
   103  	if err := unmarshal(&str); err != nil {
   104  		return err
   105  	}
   106  	if str == "" {
   107  		*t = DefaultExtendedMetricsType
   108  		return nil
   109  	}
   110  	strs := make([]string, 0, len(validExtendedMetricsTypes))
   111  	for _, valid := range validExtendedMetricsTypes {
   112  		if str == valid.String() {
   113  			*t = valid
   114  			return nil
   115  		}
   116  		strs = append(strs, "'"+valid.String()+"'")
   117  	}
   118  	return fmt.Errorf("invalid ExtendedMetricsType '%s' valid types are: %s",
   119  		str, strings.Join(strs, ", "))
   120  }
   121  
   122  // StartReportingExtendedMetrics creates a extend metrics reporter and starts
   123  // the reporter returning it so it may be stopped if successfully started.
   124  func StartReportingExtendedMetrics(
   125  	scope tally.Scope,
   126  	reportInterval time.Duration,
   127  	metricsType ExtendedMetricsType,
   128  ) (Reporter, error) {
   129  	reporter := NewExtendedMetricsReporter(scope, reportInterval, metricsType)
   130  	if err := reporter.Start(); err != nil {
   131  		return nil, err
   132  	}
   133  	return reporter, nil
   134  }
   135  
   136  type runtimeMetrics struct {
   137  	NumGoRoutines   tally.Gauge
   138  	GoMaxProcs      tally.Gauge
   139  	MemoryAllocated tally.Gauge
   140  	MemoryHeap      tally.Gauge
   141  	MemoryHeapIdle  tally.Gauge
   142  	MemoryHeapInuse tally.Gauge
   143  	MemoryStack     tally.Gauge
   144  	GCCPUFraction   tally.Gauge
   145  	NumGC           tally.Counter
   146  	GcPauseMs       tally.Timer
   147  	lastNumGC       uint32
   148  }
   149  
   150  func (r *runtimeMetrics) report(metricsType ExtendedMetricsType) {
   151  	if metricsType == NoExtendedMetrics {
   152  		return
   153  	}
   154  
   155  	r.NumGoRoutines.Update(float64(runtime.NumGoroutine()))
   156  	r.GoMaxProcs.Update(float64(runtime.GOMAXPROCS(0)))
   157  	if metricsType < DetailedExtendedMetrics {
   158  		return
   159  	}
   160  
   161  	var memStats runtime.MemStats
   162  	runtime.ReadMemStats(&memStats)
   163  	r.MemoryAllocated.Update(float64(memStats.Alloc))
   164  	r.MemoryHeap.Update(float64(memStats.HeapAlloc))
   165  	r.MemoryHeapIdle.Update(float64(memStats.HeapIdle))
   166  	r.MemoryHeapInuse.Update(float64(memStats.HeapInuse))
   167  	r.MemoryStack.Update(float64(memStats.StackInuse))
   168  	r.GCCPUFraction.Update(memStats.GCCPUFraction)
   169  
   170  	// memStats.NumGC is a perpetually incrementing counter (unless it wraps at 2^32).
   171  	num := memStats.NumGC
   172  	lastNum := atomic.SwapUint32(&r.lastNumGC, num)
   173  	if delta := num - lastNum; delta > 0 {
   174  		r.NumGC.Inc(int64(delta))
   175  		if delta > 255 {
   176  			// too many GCs happened, the timestamps buffer got wrapped around. Report only the last 256.
   177  			lastNum = num - 256
   178  		}
   179  		for i := lastNum; i != num; i++ {
   180  			pause := memStats.PauseNs[i%256]
   181  			r.GcPauseMs.Record(time.Duration(pause))
   182  		}
   183  	}
   184  }
   185  
   186  type extendedMetricsReporter struct {
   187  	baseReporter
   188  	processReporter Reporter
   189  
   190  	metricsType ExtendedMetricsType
   191  	runtime     runtimeMetrics
   192  }
   193  
   194  // NewExtendedMetricsReporter creates a new extended metrics reporter
   195  // that reports runtime and process metrics.
   196  func NewExtendedMetricsReporter(
   197  	scope tally.Scope,
   198  	reportInterval time.Duration,
   199  	metricsType ExtendedMetricsType,
   200  ) Reporter {
   201  	var (
   202  		r                     = new(extendedMetricsReporter)
   203  		enableProcessReporter bool
   204  	)
   205  
   206  	r.metricsType = metricsType
   207  	r.init(reportInterval, func() {
   208  		r.runtime.report(r.metricsType)
   209  	})
   210  
   211  	if r.metricsType == NoExtendedMetrics {
   212  		return r
   213  	}
   214  
   215  	switch r.metricsType {
   216  	case ModerateExtendedMetrics:
   217  		enableProcessReporter = true
   218  	case DetailedExtendedMetrics:
   219  		enableProcessReporter = true
   220  	default:
   221  		enableProcessReporter = false
   222  	}
   223  
   224  	if enableProcessReporter {
   225  		// ProcessReporter can be quite slow in some situations (specifically
   226  		// counting FDs for processes that have many of them) so it runs on
   227  		// its own report loop.
   228  		r.processReporter = NewProcessReporter(scope, reportInterval)
   229  	}
   230  
   231  	runtimeScope := scope.SubScope("runtime")
   232  	r.runtime.NumGoRoutines = runtimeScope.Gauge("num-goroutines")
   233  	r.runtime.GoMaxProcs = runtimeScope.Gauge("gomaxprocs")
   234  	if r.metricsType < DetailedExtendedMetrics {
   235  		return r
   236  	}
   237  
   238  	var memstats runtime.MemStats
   239  	runtime.ReadMemStats(&memstats)
   240  	memoryScope := runtimeScope.SubScope("memory")
   241  	r.runtime.MemoryAllocated = memoryScope.Gauge("allocated")
   242  	r.runtime.MemoryHeap = memoryScope.Gauge("heap")
   243  	r.runtime.MemoryHeapIdle = memoryScope.Gauge("heapidle")
   244  	r.runtime.MemoryHeapInuse = memoryScope.Gauge("heapinuse")
   245  	r.runtime.MemoryStack = memoryScope.Gauge("stack")
   246  	r.runtime.GCCPUFraction = memoryScope.Gauge("gc-cpu-fraction")
   247  	r.runtime.NumGC = memoryScope.Counter("num-gc")
   248  	r.runtime.GcPauseMs = memoryScope.Timer("gc-pause-ms")
   249  	r.runtime.lastNumGC = memstats.NumGC
   250  
   251  	return r
   252  }
   253  
   254  func (e *extendedMetricsReporter) Start() error {
   255  	if err := e.baseReporter.Start(); err != nil {
   256  		return err
   257  	}
   258  
   259  	if e.processReporter != nil {
   260  		if err := e.processReporter.Start(); err != nil {
   261  			return err
   262  		}
   263  	}
   264  
   265  	return nil
   266  }
   267  
   268  func (e *extendedMetricsReporter) Stop() error {
   269  	multiErr := xerrors.NewMultiError()
   270  
   271  	if err := e.baseReporter.Stop(); err != nil {
   272  		multiErr = multiErr.Add(err)
   273  	}
   274  
   275  	if e.processReporter != nil {
   276  		if err := e.processReporter.Stop(); err != nil {
   277  			multiErr = multiErr.Add(err)
   278  		}
   279  	}
   280  
   281  	return multiErr.FinalError()
   282  }