github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/instrument/extended.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package instrument 22 23 import ( 24 "fmt" 25 "runtime" 26 "strings" 27 "sync/atomic" 28 "time" 29 30 xerrors "github.com/m3db/m3/src/x/errors" 31 32 "github.com/uber-go/tally" 33 ) 34 35 // ExtendedMetricsType is a type of extended metrics to report. 36 type ExtendedMetricsType int 37 38 const ( 39 // NoExtendedMetrics describes no extended metrics. 40 NoExtendedMetrics ExtendedMetricsType = iota 41 42 // SimpleExtendedMetrics describes just a simple level of extended metrics: 43 // - number of active goroutines 44 // - number of configured gomaxprocs 45 SimpleExtendedMetrics 46 47 // ModerateExtendedMetrics describes a moderately verbose level of extended metrics: 48 // - number of active goroutines 49 // - number of configured gomaxprocs 50 // - number of file descriptors 51 ModerateExtendedMetrics 52 53 // DetailedExtendedMetrics describes a detailed level of extended metrics: 54 // - number of active goroutines 55 // - number of configured gomaxprocs 56 // - number of file descriptors 57 // - memory allocated running count 58 // - memory used by heap 59 // - memory used by heap that is idle 60 // - memory used by heap that is in use 61 // - memory used by stack 62 // - number of garbage collections 63 // - GC pause times 64 DetailedExtendedMetrics 65 66 // DetailedGoRuntimeMetrics reports all detailed metrics, sans FD metrics to save CPU 67 // if in-use file descriptors are measured by an external system, like cAdvisor. 68 DetailedGoRuntimeMetrics 69 70 // DefaultExtendedMetricsType is the default extended metrics level. 71 DefaultExtendedMetricsType = SimpleExtendedMetrics 72 ) 73 74 var ( 75 validExtendedMetricsTypes = []ExtendedMetricsType{ 76 NoExtendedMetrics, 77 SimpleExtendedMetrics, 78 ModerateExtendedMetrics, 79 DetailedExtendedMetrics, 80 DetailedGoRuntimeMetrics, 81 } 82 ) 83 84 func (t ExtendedMetricsType) String() string { 85 switch t { 86 case NoExtendedMetrics: 87 return "none" 88 case SimpleExtendedMetrics: 89 return "simple" 90 case ModerateExtendedMetrics: 91 return "moderate" 92 case DetailedExtendedMetrics: 93 return "detailed" 94 case DetailedGoRuntimeMetrics: 95 return "runtime" 96 } 97 return "unknown" 98 } 99 100 // MarshalYAML marshals an ExtendedMetricsType. 101 func (t *ExtendedMetricsType) MarshalYAML() (interface{}, error) { 102 return t.String(), nil 103 } 104 105 // UnmarshalYAML unmarshals an ExtendedMetricsType into a valid type from string. 106 func (t *ExtendedMetricsType) UnmarshalYAML(unmarshal func(interface{}) error) error { 107 var str string 108 if err := unmarshal(&str); err != nil { 109 return err 110 } 111 if str == "" { 112 *t = DefaultExtendedMetricsType 113 return nil 114 } 115 strs := make([]string, 0, len(validExtendedMetricsTypes)) 116 for _, valid := range validExtendedMetricsTypes { 117 if str == valid.String() { 118 *t = valid 119 return nil 120 } 121 strs = append(strs, "'"+valid.String()+"'") 122 } 123 return fmt.Errorf("invalid ExtendedMetricsType '%s' valid types are: %s", 124 str, strings.Join(strs, ", ")) 125 } 126 127 // StartReportingExtendedMetrics creates a extend metrics reporter and starts 128 // the reporter returning it so it may be stopped if successfully started. 129 func StartReportingExtendedMetrics( 130 scope tally.Scope, 131 reportInterval time.Duration, 132 metricsType ExtendedMetricsType, 133 ) (Reporter, error) { 134 reporter := NewExtendedMetricsReporter(scope, reportInterval, metricsType) 135 if err := reporter.Start(); err != nil { 136 return nil, err 137 } 138 return reporter, nil 139 } 140 141 type runtimeMetrics struct { 142 NumGoRoutines tally.Gauge 143 GoMaxProcs tally.Gauge 144 MemoryAllocated tally.Gauge 145 MemoryHeap tally.Gauge 146 MemoryHeapIdle tally.Gauge 147 MemoryHeapInuse tally.Gauge 148 MemoryStack tally.Gauge 149 GCCPUFraction tally.Gauge 150 NumGC tally.Counter 151 GcPauseMs tally.Timer 152 lastNumGC uint32 153 } 154 155 func (r *runtimeMetrics) report(metricsType ExtendedMetricsType) { 156 if metricsType == NoExtendedMetrics { 157 return 158 } 159 160 r.NumGoRoutines.Update(float64(runtime.NumGoroutine())) 161 r.GoMaxProcs.Update(float64(runtime.GOMAXPROCS(0))) 162 if metricsType < DetailedExtendedMetrics { 163 return 164 } 165 166 var memStats runtime.MemStats 167 runtime.ReadMemStats(&memStats) 168 r.MemoryAllocated.Update(float64(memStats.Alloc)) 169 r.MemoryHeap.Update(float64(memStats.HeapAlloc)) 170 r.MemoryHeapIdle.Update(float64(memStats.HeapIdle)) 171 r.MemoryHeapInuse.Update(float64(memStats.HeapInuse)) 172 r.MemoryStack.Update(float64(memStats.StackInuse)) 173 r.GCCPUFraction.Update(memStats.GCCPUFraction) 174 175 // memStats.NumGC is a perpetually incrementing counter (unless it wraps at 2^32). 176 num := memStats.NumGC 177 lastNum := atomic.SwapUint32(&r.lastNumGC, num) 178 if delta := num - lastNum; delta > 0 { 179 r.NumGC.Inc(int64(delta)) 180 if delta > 255 { 181 // too many GCs happened, the timestamps buffer got wrapped around. Report only the last 256. 182 lastNum = num - 256 183 } 184 for i := lastNum; i != num; i++ { 185 pause := memStats.PauseNs[i%256] 186 r.GcPauseMs.Record(time.Duration(pause)) 187 } 188 } 189 } 190 191 type extendedMetricsReporter struct { 192 baseReporter 193 processReporter Reporter 194 195 metricsType ExtendedMetricsType 196 runtime runtimeMetrics 197 } 198 199 // NewExtendedMetricsReporter creates a new extended metrics reporter 200 // that reports runtime and process metrics. 201 func NewExtendedMetricsReporter( 202 scope tally.Scope, 203 reportInterval time.Duration, 204 metricsType ExtendedMetricsType, 205 ) Reporter { 206 var ( 207 r = new(extendedMetricsReporter) 208 enableProcessReporter bool 209 ) 210 211 r.metricsType = metricsType 212 r.init(reportInterval, func() { 213 r.runtime.report(r.metricsType) 214 }) 215 216 if r.metricsType == NoExtendedMetrics { 217 return r 218 } 219 220 switch r.metricsType { 221 case ModerateExtendedMetrics: 222 enableProcessReporter = true 223 case DetailedExtendedMetrics: 224 enableProcessReporter = true 225 default: 226 enableProcessReporter = false 227 } 228 229 if enableProcessReporter { 230 // ProcessReporter can be quite slow in some situations (specifically 231 // counting FDs for processes that have many of them) so it runs on 232 // its own report loop. 233 r.processReporter = NewProcessReporter(scope, reportInterval) 234 } 235 236 runtimeScope := scope.SubScope("runtime") 237 r.runtime.NumGoRoutines = runtimeScope.Gauge("num-goroutines") 238 r.runtime.GoMaxProcs = runtimeScope.Gauge("gomaxprocs") 239 if r.metricsType < DetailedExtendedMetrics { 240 return r 241 } 242 243 var memstats runtime.MemStats 244 runtime.ReadMemStats(&memstats) 245 memoryScope := runtimeScope.SubScope("memory") 246 r.runtime.MemoryAllocated = memoryScope.Gauge("allocated") 247 r.runtime.MemoryHeap = memoryScope.Gauge("heap") 248 r.runtime.MemoryHeapIdle = memoryScope.Gauge("heapidle") 249 r.runtime.MemoryHeapInuse = memoryScope.Gauge("heapinuse") 250 r.runtime.MemoryStack = memoryScope.Gauge("stack") 251 r.runtime.GCCPUFraction = memoryScope.Gauge("gc-cpu-fraction") 252 r.runtime.NumGC = memoryScope.Counter("num-gc") 253 r.runtime.GcPauseMs = memoryScope.Timer("gc-pause-ms") 254 r.runtime.lastNumGC = memstats.NumGC 255 256 return r 257 } 258 259 func (e *extendedMetricsReporter) Start() error { 260 if err := e.baseReporter.Start(); err != nil { 261 return err 262 } 263 264 if e.processReporter != nil { 265 if err := e.processReporter.Start(); err != nil { 266 return err 267 } 268 } 269 270 return nil 271 } 272 273 func (e *extendedMetricsReporter) Stop() error { 274 multiErr := xerrors.NewMultiError() 275 276 if err := e.baseReporter.Stop(); err != nil { 277 multiErr = multiErr.Add(err) 278 } 279 280 if e.processReporter != nil { 281 if err := e.processReporter.Stop(); err != nil { 282 multiErr = multiErr.Add(err) 283 } 284 } 285 286 return multiErr.FinalError() 287 }