github.com/m3db/m3@v1.5.0/src/x/instrument/extended.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package instrument 22 23 import ( 24 "fmt" 25 "runtime" 26 "strings" 27 "sync/atomic" 28 "time" 29 30 xerrors "github.com/m3db/m3/src/x/errors" 31 32 "github.com/uber-go/tally" 33 ) 34 35 // ExtendedMetricsType is a type of extended metrics to report. 36 type ExtendedMetricsType int 37 38 const ( 39 // NoExtendedMetrics describes no extended metrics. 40 NoExtendedMetrics ExtendedMetricsType = iota 41 42 // SimpleExtendedMetrics describes just a simple level of extended metrics: 43 // - number of active goroutines 44 // - number of configured gomaxprocs 45 SimpleExtendedMetrics 46 47 // ModerateExtendedMetrics describes a moderately verbose level of extended metrics: 48 // - number of active goroutines 49 // - number of configured gomaxprocs 50 // - number of file descriptors 51 ModerateExtendedMetrics 52 53 // DetailedExtendedMetrics describes a detailed level of extended metrics: 54 // - number of active goroutines 55 // - number of configured gomaxprocs 56 // - number of file descriptors 57 // - memory allocated running count 58 // - memory used by heap 59 // - memory used by heap that is idle 60 // - memory used by heap that is in use 61 // - memory used by stack 62 // - number of garbage collections 63 // - GC pause times 64 DetailedExtendedMetrics 65 66 // DetailedGoRuntimeMetrics reports all detailed metrics, sans FD metrics to save CPU 67 // if in-use file descriptors are measured by an external system, like cAdvisor. 68 DetailedGoRuntimeMetrics 69 70 // DefaultExtendedMetricsType is the default extended metrics level. 71 DefaultExtendedMetricsType = SimpleExtendedMetrics 72 ) 73 74 var ( 75 validExtendedMetricsTypes = []ExtendedMetricsType{ 76 NoExtendedMetrics, 77 SimpleExtendedMetrics, 78 ModerateExtendedMetrics, 79 DetailedExtendedMetrics, 80 DetailedGoRuntimeMetrics, 81 } 82 ) 83 84 func (t ExtendedMetricsType) String() string { 85 switch t { 86 case NoExtendedMetrics: 87 return "none" 88 case SimpleExtendedMetrics: 89 return "simple" 90 case ModerateExtendedMetrics: 91 return "moderate" 92 case DetailedExtendedMetrics: 93 return "detailed" 94 case DetailedGoRuntimeMetrics: 95 return "runtime" 96 } 97 return "unknown" 98 } 99 100 // UnmarshalYAML unmarshals an ExtendedMetricsType into a valid type from string. 101 func (t *ExtendedMetricsType) UnmarshalYAML(unmarshal func(interface{}) error) error { 102 var str string 103 if err := unmarshal(&str); err != nil { 104 return err 105 } 106 if str == "" { 107 *t = DefaultExtendedMetricsType 108 return nil 109 } 110 strs := make([]string, 0, len(validExtendedMetricsTypes)) 111 for _, valid := range validExtendedMetricsTypes { 112 if str == valid.String() { 113 *t = valid 114 return nil 115 } 116 strs = append(strs, "'"+valid.String()+"'") 117 } 118 return fmt.Errorf("invalid ExtendedMetricsType '%s' valid types are: %s", 119 str, strings.Join(strs, ", ")) 120 } 121 122 // StartReportingExtendedMetrics creates a extend metrics reporter and starts 123 // the reporter returning it so it may be stopped if successfully started. 124 func StartReportingExtendedMetrics( 125 scope tally.Scope, 126 reportInterval time.Duration, 127 metricsType ExtendedMetricsType, 128 ) (Reporter, error) { 129 reporter := NewExtendedMetricsReporter(scope, reportInterval, metricsType) 130 if err := reporter.Start(); err != nil { 131 return nil, err 132 } 133 return reporter, nil 134 } 135 136 type runtimeMetrics struct { 137 NumGoRoutines tally.Gauge 138 GoMaxProcs tally.Gauge 139 MemoryAllocated tally.Gauge 140 MemoryHeap tally.Gauge 141 MemoryHeapIdle tally.Gauge 142 MemoryHeapInuse tally.Gauge 143 MemoryStack tally.Gauge 144 GCCPUFraction tally.Gauge 145 NumGC tally.Counter 146 GcPauseMs tally.Timer 147 lastNumGC uint32 148 } 149 150 func (r *runtimeMetrics) report(metricsType ExtendedMetricsType) { 151 if metricsType == NoExtendedMetrics { 152 return 153 } 154 155 r.NumGoRoutines.Update(float64(runtime.NumGoroutine())) 156 r.GoMaxProcs.Update(float64(runtime.GOMAXPROCS(0))) 157 if metricsType < DetailedExtendedMetrics { 158 return 159 } 160 161 var memStats runtime.MemStats 162 runtime.ReadMemStats(&memStats) 163 r.MemoryAllocated.Update(float64(memStats.Alloc)) 164 r.MemoryHeap.Update(float64(memStats.HeapAlloc)) 165 r.MemoryHeapIdle.Update(float64(memStats.HeapIdle)) 166 r.MemoryHeapInuse.Update(float64(memStats.HeapInuse)) 167 r.MemoryStack.Update(float64(memStats.StackInuse)) 168 r.GCCPUFraction.Update(memStats.GCCPUFraction) 169 170 // memStats.NumGC is a perpetually incrementing counter (unless it wraps at 2^32). 171 num := memStats.NumGC 172 lastNum := atomic.SwapUint32(&r.lastNumGC, num) 173 if delta := num - lastNum; delta > 0 { 174 r.NumGC.Inc(int64(delta)) 175 if delta > 255 { 176 // too many GCs happened, the timestamps buffer got wrapped around. Report only the last 256. 177 lastNum = num - 256 178 } 179 for i := lastNum; i != num; i++ { 180 pause := memStats.PauseNs[i%256] 181 r.GcPauseMs.Record(time.Duration(pause)) 182 } 183 } 184 } 185 186 type extendedMetricsReporter struct { 187 baseReporter 188 processReporter Reporter 189 190 metricsType ExtendedMetricsType 191 runtime runtimeMetrics 192 } 193 194 // NewExtendedMetricsReporter creates a new extended metrics reporter 195 // that reports runtime and process metrics. 196 func NewExtendedMetricsReporter( 197 scope tally.Scope, 198 reportInterval time.Duration, 199 metricsType ExtendedMetricsType, 200 ) Reporter { 201 var ( 202 r = new(extendedMetricsReporter) 203 enableProcessReporter bool 204 ) 205 206 r.metricsType = metricsType 207 r.init(reportInterval, func() { 208 r.runtime.report(r.metricsType) 209 }) 210 211 if r.metricsType == NoExtendedMetrics { 212 return r 213 } 214 215 switch r.metricsType { 216 case ModerateExtendedMetrics: 217 enableProcessReporter = true 218 case DetailedExtendedMetrics: 219 enableProcessReporter = true 220 default: 221 enableProcessReporter = false 222 } 223 224 if enableProcessReporter { 225 // ProcessReporter can be quite slow in some situations (specifically 226 // counting FDs for processes that have many of them) so it runs on 227 // its own report loop. 228 r.processReporter = NewProcessReporter(scope, reportInterval) 229 } 230 231 runtimeScope := scope.SubScope("runtime") 232 r.runtime.NumGoRoutines = runtimeScope.Gauge("num-goroutines") 233 r.runtime.GoMaxProcs = runtimeScope.Gauge("gomaxprocs") 234 if r.metricsType < DetailedExtendedMetrics { 235 return r 236 } 237 238 var memstats runtime.MemStats 239 runtime.ReadMemStats(&memstats) 240 memoryScope := runtimeScope.SubScope("memory") 241 r.runtime.MemoryAllocated = memoryScope.Gauge("allocated") 242 r.runtime.MemoryHeap = memoryScope.Gauge("heap") 243 r.runtime.MemoryHeapIdle = memoryScope.Gauge("heapidle") 244 r.runtime.MemoryHeapInuse = memoryScope.Gauge("heapinuse") 245 r.runtime.MemoryStack = memoryScope.Gauge("stack") 246 r.runtime.GCCPUFraction = memoryScope.Gauge("gc-cpu-fraction") 247 r.runtime.NumGC = memoryScope.Counter("num-gc") 248 r.runtime.GcPauseMs = memoryScope.Timer("gc-pause-ms") 249 r.runtime.lastNumGC = memstats.NumGC 250 251 return r 252 } 253 254 func (e *extendedMetricsReporter) Start() error { 255 if err := e.baseReporter.Start(); err != nil { 256 return err 257 } 258 259 if e.processReporter != nil { 260 if err := e.processReporter.Start(); err != nil { 261 return err 262 } 263 } 264 265 return nil 266 } 267 268 func (e *extendedMetricsReporter) Stop() error { 269 multiErr := xerrors.NewMultiError() 270 271 if err := e.baseReporter.Stop(); err != nil { 272 multiErr = multiErr.Add(err) 273 } 274 275 if e.processReporter != nil { 276 if err := e.processReporter.Stop(); err != nil { 277 multiErr = multiErr.Add(err) 278 } 279 } 280 281 return multiErr.FinalError() 282 }