github.com/waldiirawan/apm-agent-go/v2@v2.2.2/breakdown.go (about) 1 // Licensed to Elasticsearch B.V. under one or more contributor 2 // license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright 4 // ownership. Elasticsearch B.V. licenses this file to you under 5 // the Apache License, Version 2.0 (the "License"); you may 6 // not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, 12 // software distributed under the License is distributed on an 13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 // KIND, either express or implied. See the License for the 15 // specific language governing permissions and limitations 16 // under the License. 17 18 package apm // import "github.com/waldiirawan/apm-agent-go/v2" 19 20 import ( 21 "fmt" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "github.com/waldiirawan/apm-agent-go/v2/model" 27 ) 28 29 const ( 30 // breakdownMetricsLimit is the maximum number of breakdown metric 31 // buckets to accumulate per reporting period. Metrics are broken 32 // down by {transactionType, transactionName, spanType, spanSubtype} 33 // tuples. 34 breakdownMetricsLimit = 1000 35 36 // appSpanType is the special span type associated with transactions, 37 // for reporting transaction self-time. 38 appSpanType = "app" 39 40 // Breakdown metric names. 41 spanSelfTimeCountMetricName = "span.self_time.count" 42 spanSelfTimeSumMetricName = "span.self_time.sum.us" 43 ) 44 45 var ( 46 breakdownMetricsLimitWarning = fmt.Sprintf(` 47 The limit of %d breakdown metricsets has been reached, no new metricsets will be created. 48 Try to name your transactions so that there are less distinct transaction names.`[1:], 49 breakdownMetricsLimit, 50 ) 51 ) 52 53 // spanTimingsKey identifies a span type and subtype, for use as the key in 54 // spanTimingsMap. 55 type spanTimingsKey struct { 56 spanType string 57 spanSubtype string 58 } 59 60 // spanTiming records the number of times a {spanType, spanSubtype} pair 61 // has occurred (within the context of a transaction group), along with 62 // the sum of the span durations. 63 type spanTiming struct { 64 duration int64 65 count uint64 66 } 67 68 // spanTimingsMap records span timings for a transaction group. 69 type spanTimingsMap map[spanTimingsKey]spanTiming 70 71 // add accumulates the timing for a {spanType, spanSubtype} pair. 72 func (m spanTimingsMap) add(spanType, spanSubtype string, d time.Duration) { 73 k := spanTimingsKey{spanType: spanType, spanSubtype: spanSubtype} 74 timing := m[k] 75 timing.count++ 76 timing.duration += int64(d) 77 m[k] = timing 78 } 79 80 // reset resets m back to its initial zero state. 81 func (m spanTimingsMap) reset() { 82 for k := range m { 83 delete(m, k) 84 } 85 } 86 87 // breakdownMetrics holds a pair of breakdown metrics maps. The "active" map 88 // accumulates new breakdown metrics, and is swapped with the "inactive" map 89 // just prior to when metrics gathering begins. When metrics gathering 90 // completes, the inactive map will be empty. 91 // 92 // breakdownMetrics may be written to concurrently by the tracer, and any 93 // number of other goroutines when a transaction cannot be enqueued. 94 type breakdownMetrics struct { 95 enabled bool 96 97 mu sync.RWMutex 98 active, inactive *breakdownMetricsMap 99 } 100 101 func newBreakdownMetrics() *breakdownMetrics { 102 return &breakdownMetrics{ 103 active: newBreakdownMetricsMap(), 104 inactive: newBreakdownMetricsMap(), 105 } 106 } 107 108 type breakdownMetricsMap struct { 109 mu sync.RWMutex 110 m map[uint64][]*breakdownMetricsMapEntry 111 space []breakdownMetricsMapEntry 112 entries int 113 } 114 115 func newBreakdownMetricsMap() *breakdownMetricsMap { 116 return &breakdownMetricsMap{ 117 m: make(map[uint64][]*breakdownMetricsMapEntry), 118 space: make([]breakdownMetricsMapEntry, breakdownMetricsLimit), 119 } 120 } 121 122 type breakdownMetricsMapEntry struct { 123 breakdownMetricsKey 124 breakdownTiming 125 } 126 127 // breakdownMetricsKey identifies a transaction group, and optionally a 128 // spanTimingsKey, for recording transaction and span breakdown metrics. 129 type breakdownMetricsKey struct { 130 transactionType string 131 transactionName string 132 spanTimingsKey 133 } 134 135 func (k breakdownMetricsKey) hash() uint64 { 136 h := newFnv1a() 137 h.add(k.transactionType) 138 h.add(k.transactionName) 139 if k.spanType != "" { 140 h.add(k.spanType) 141 } 142 if k.spanSubtype != "" { 143 h.add(k.spanSubtype) 144 } 145 return uint64(h) 146 } 147 148 // breakdownTiming holds breakdown metrics. 149 type breakdownTiming struct { 150 // span holds the "span.self_time" metric values. 151 span spanTiming 152 } 153 154 func (lhs *breakdownTiming) accumulate(rhs breakdownTiming) { 155 atomic.AddUint64(&lhs.span.count, rhs.span.count) 156 atomic.AddInt64(&lhs.span.duration, rhs.span.duration) 157 } 158 159 // recordTransaction records breakdown metrics for td into m. 160 // 161 // recordTransaction returns true if breakdown metrics were 162 // completely recorded, and false if any metrics were not 163 // recorded due to the limit being reached. 164 func (m *breakdownMetrics) recordTransaction(td *TransactionData) bool { 165 m.mu.RLock() 166 defer m.mu.RUnlock() 167 168 k := breakdownMetricsKey{ 169 transactionType: td.Type, 170 transactionName: td.Name, 171 spanTimingsKey: spanTimingsKey{ 172 spanType: appSpanType, 173 }, 174 } 175 176 var transactionSpanTiming spanTiming 177 if td.breakdownMetricsEnabled { 178 endTime := td.timestamp.Add(td.Duration) 179 transactionSelfTime := td.Duration - td.childrenTimer.finalDuration(endTime) 180 transactionSpanTiming = spanTiming{count: 1, duration: int64(transactionSelfTime)} 181 } 182 183 if !m.active.record(k, breakdownTiming{ 184 span: transactionSpanTiming, 185 }) { 186 // We couldn't record the transaction's metricset, so we won't 187 // be able to record spans for that transaction either. 188 return false 189 } 190 191 ok := true 192 for sk, timing := range td.spanTimings { 193 k.spanTimingsKey = sk 194 ok = ok && m.active.record(k, breakdownTiming{span: timing}) 195 } 196 return ok 197 } 198 199 // record records a single breakdown metric, identified by k. 200 func (m *breakdownMetricsMap) record(k breakdownMetricsKey, bt breakdownTiming) bool { 201 hash := k.hash() 202 m.mu.RLock() 203 entries, ok := m.m[hash] 204 m.mu.RUnlock() 205 var offset int 206 if ok { 207 for offset = range entries { 208 if entries[offset].breakdownMetricsKey == k { 209 // The append may reallocate the entries, but the 210 // entries are pointers into m.activeSpace. Therefore, 211 // entries' timings can safely be atomically incremented 212 // without holding the read lock. 213 entries[offset].breakdownTiming.accumulate(bt) 214 return true 215 } 216 } 217 offset++ // where to start searching with the write lock below 218 } 219 220 m.mu.Lock() 221 entries, ok = m.m[hash] 222 if ok { 223 for i := range entries[offset:] { 224 if entries[offset+i].breakdownMetricsKey == k { 225 m.mu.Unlock() 226 entries[offset+i].breakdownTiming.accumulate(bt) 227 return true 228 } 229 } 230 } else if m.entries >= breakdownMetricsLimit { 231 m.mu.Unlock() 232 return false 233 } 234 entry := &m.space[m.entries] 235 *entry = breakdownMetricsMapEntry{ 236 breakdownTiming: bt, 237 breakdownMetricsKey: k, 238 } 239 m.m[hash] = append(entries, entry) 240 m.entries++ 241 m.mu.Unlock() 242 return true 243 } 244 245 // gather is called by builtinMetricsGatherer to gather breakdown metrics. 246 func (m *breakdownMetrics) gather(out *Metrics) { 247 // Hold m.mu only long enough to swap m.active and m.inactive. 248 // This will be blocked by metric updates, but that's OK; only 249 // metrics gathering will be delayed. After swapping we do not 250 // need to hold m.mu, since nothing concurrently accesses 251 // m.inactive while the gatherer is iterating over it. 252 m.mu.Lock() 253 m.active, m.inactive = m.inactive, m.active 254 m.mu.Unlock() 255 256 for hash, entries := range m.inactive.m { 257 for _, entry := range entries { 258 if entry.span.count > 0 { 259 out.transactionGroupMetrics = append(out.transactionGroupMetrics, &model.Metrics{ 260 Transaction: model.MetricsTransaction{ 261 Type: entry.transactionType, 262 Name: entry.transactionName, 263 }, 264 Span: model.MetricsSpan{ 265 Type: entry.spanType, 266 Subtype: entry.spanSubtype, 267 }, 268 Samples: map[string]model.Metric{ 269 spanSelfTimeCountMetricName: { 270 Value: float64(entry.span.count), 271 }, 272 spanSelfTimeSumMetricName: { 273 Value: durationMicros(time.Duration(entry.span.duration)), 274 }, 275 }, 276 }) 277 } 278 entry.breakdownMetricsKey = breakdownMetricsKey{} // release strings 279 } 280 delete(m.inactive.m, hash) 281 } 282 m.inactive.entries = 0 283 } 284 285 // childrenTimer tracks time spent by children of a transaction or span. 286 // 287 // childrenTimer is not goroutine-safe. 288 type childrenTimer struct { 289 // active holds the number active children. 290 active int 291 292 // start holds the timestamp at which active went from zero to one. 293 start time.Time 294 295 // totalDuration holds the total duration of time periods in which 296 // at least one child was active. 297 totalDuration time.Duration 298 } 299 300 func (t *childrenTimer) childStarted(start time.Time) { 301 t.active++ 302 if t.active == 1 { 303 t.start = start 304 } 305 } 306 307 func (t *childrenTimer) childEnded(end time.Time) { 308 t.active-- 309 if t.active == 0 { 310 t.totalDuration += end.Sub(t.start) 311 } 312 } 313 314 func (t *childrenTimer) finalDuration(end time.Time) time.Duration { 315 if t.active > 0 { 316 t.active = 0 317 t.totalDuration += end.Sub(t.start) 318 } 319 return t.totalDuration 320 }