github.com/waldiirawan/apm-agent-go/v2@v2.2.2/breakdown.go (about)

     1  // Licensed to Elasticsearch B.V. under one or more contributor
     2  // license agreements. See the NOTICE file distributed with
     3  // this work for additional information regarding copyright
     4  // ownership. Elasticsearch B.V. licenses this file to you under
     5  // the Apache License, Version 2.0 (the "License"); you may
     6  // not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing,
    12  // software distributed under the License is distributed on an
    13  // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    14  // KIND, either express or implied.  See the License for the
    15  // specific language governing permissions and limitations
    16  // under the License.
    17  
    18  package apm // import "github.com/waldiirawan/apm-agent-go/v2"
    19  
    20  import (
    21  	"fmt"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"github.com/waldiirawan/apm-agent-go/v2/model"
    27  )
    28  
    29  const (
    30  	// breakdownMetricsLimit is the maximum number of breakdown metric
    31  	// buckets to accumulate per reporting period. Metrics are broken
    32  	// down by {transactionType, transactionName, spanType, spanSubtype}
    33  	// tuples.
    34  	breakdownMetricsLimit = 1000
    35  
    36  	// appSpanType is the special span type associated with transactions,
    37  	// for reporting transaction self-time.
    38  	appSpanType = "app"
    39  
    40  	// Breakdown metric names.
    41  	spanSelfTimeCountMetricName = "span.self_time.count"
    42  	spanSelfTimeSumMetricName   = "span.self_time.sum.us"
    43  )
    44  
    45  var (
    46  	breakdownMetricsLimitWarning = fmt.Sprintf(`
    47  The limit of %d breakdown metricsets has been reached, no new metricsets will be created.
    48  Try to name your transactions so that there are less distinct transaction names.`[1:],
    49  		breakdownMetricsLimit,
    50  	)
    51  )
    52  
    53  // spanTimingsKey identifies a span type and subtype, for use as the key in
    54  // spanTimingsMap.
    55  type spanTimingsKey struct {
    56  	spanType    string
    57  	spanSubtype string
    58  }
    59  
    60  // spanTiming records the number of times a {spanType, spanSubtype} pair
    61  // has occurred (within the context of a transaction group), along with
    62  // the sum of the span durations.
    63  type spanTiming struct {
    64  	duration int64
    65  	count    uint64
    66  }
    67  
    68  // spanTimingsMap records span timings for a transaction group.
    69  type spanTimingsMap map[spanTimingsKey]spanTiming
    70  
    71  // add accumulates the timing for a {spanType, spanSubtype} pair.
    72  func (m spanTimingsMap) add(spanType, spanSubtype string, d time.Duration) {
    73  	k := spanTimingsKey{spanType: spanType, spanSubtype: spanSubtype}
    74  	timing := m[k]
    75  	timing.count++
    76  	timing.duration += int64(d)
    77  	m[k] = timing
    78  }
    79  
    80  // reset resets m back to its initial zero state.
    81  func (m spanTimingsMap) reset() {
    82  	for k := range m {
    83  		delete(m, k)
    84  	}
    85  }
    86  
    87  // breakdownMetrics holds a pair of breakdown metrics maps. The "active" map
    88  // accumulates new breakdown metrics, and is swapped with the "inactive" map
    89  // just prior to when metrics gathering begins. When metrics gathering
    90  // completes, the inactive map will be empty.
    91  //
    92  // breakdownMetrics may be written to concurrently by the tracer, and any
    93  // number of other goroutines when a transaction cannot be enqueued.
    94  type breakdownMetrics struct {
    95  	enabled bool
    96  
    97  	mu               sync.RWMutex
    98  	active, inactive *breakdownMetricsMap
    99  }
   100  
   101  func newBreakdownMetrics() *breakdownMetrics {
   102  	return &breakdownMetrics{
   103  		active:   newBreakdownMetricsMap(),
   104  		inactive: newBreakdownMetricsMap(),
   105  	}
   106  }
   107  
   108  type breakdownMetricsMap struct {
   109  	mu      sync.RWMutex
   110  	m       map[uint64][]*breakdownMetricsMapEntry
   111  	space   []breakdownMetricsMapEntry
   112  	entries int
   113  }
   114  
   115  func newBreakdownMetricsMap() *breakdownMetricsMap {
   116  	return &breakdownMetricsMap{
   117  		m:     make(map[uint64][]*breakdownMetricsMapEntry),
   118  		space: make([]breakdownMetricsMapEntry, breakdownMetricsLimit),
   119  	}
   120  }
   121  
   122  type breakdownMetricsMapEntry struct {
   123  	breakdownMetricsKey
   124  	breakdownTiming
   125  }
   126  
   127  // breakdownMetricsKey identifies a transaction group, and optionally a
   128  // spanTimingsKey, for recording transaction and span breakdown metrics.
   129  type breakdownMetricsKey struct {
   130  	transactionType string
   131  	transactionName string
   132  	spanTimingsKey
   133  }
   134  
   135  func (k breakdownMetricsKey) hash() uint64 {
   136  	h := newFnv1a()
   137  	h.add(k.transactionType)
   138  	h.add(k.transactionName)
   139  	if k.spanType != "" {
   140  		h.add(k.spanType)
   141  	}
   142  	if k.spanSubtype != "" {
   143  		h.add(k.spanSubtype)
   144  	}
   145  	return uint64(h)
   146  }
   147  
   148  // breakdownTiming holds breakdown metrics.
   149  type breakdownTiming struct {
   150  	// span holds the "span.self_time" metric values.
   151  	span spanTiming
   152  }
   153  
   154  func (lhs *breakdownTiming) accumulate(rhs breakdownTiming) {
   155  	atomic.AddUint64(&lhs.span.count, rhs.span.count)
   156  	atomic.AddInt64(&lhs.span.duration, rhs.span.duration)
   157  }
   158  
   159  // recordTransaction records breakdown metrics for td into m.
   160  //
   161  // recordTransaction returns true if breakdown metrics were
   162  // completely recorded, and false if any metrics were not
   163  // recorded due to the limit being reached.
   164  func (m *breakdownMetrics) recordTransaction(td *TransactionData) bool {
   165  	m.mu.RLock()
   166  	defer m.mu.RUnlock()
   167  
   168  	k := breakdownMetricsKey{
   169  		transactionType: td.Type,
   170  		transactionName: td.Name,
   171  		spanTimingsKey: spanTimingsKey{
   172  			spanType: appSpanType,
   173  		},
   174  	}
   175  
   176  	var transactionSpanTiming spanTiming
   177  	if td.breakdownMetricsEnabled {
   178  		endTime := td.timestamp.Add(td.Duration)
   179  		transactionSelfTime := td.Duration - td.childrenTimer.finalDuration(endTime)
   180  		transactionSpanTiming = spanTiming{count: 1, duration: int64(transactionSelfTime)}
   181  	}
   182  
   183  	if !m.active.record(k, breakdownTiming{
   184  		span: transactionSpanTiming,
   185  	}) {
   186  		// We couldn't record the transaction's metricset, so we won't
   187  		// be able to record spans for that transaction either.
   188  		return false
   189  	}
   190  
   191  	ok := true
   192  	for sk, timing := range td.spanTimings {
   193  		k.spanTimingsKey = sk
   194  		ok = ok && m.active.record(k, breakdownTiming{span: timing})
   195  	}
   196  	return ok
   197  }
   198  
   199  // record records a single breakdown metric, identified by k.
   200  func (m *breakdownMetricsMap) record(k breakdownMetricsKey, bt breakdownTiming) bool {
   201  	hash := k.hash()
   202  	m.mu.RLock()
   203  	entries, ok := m.m[hash]
   204  	m.mu.RUnlock()
   205  	var offset int
   206  	if ok {
   207  		for offset = range entries {
   208  			if entries[offset].breakdownMetricsKey == k {
   209  				// The append may reallocate the entries, but the
   210  				// entries are pointers into m.activeSpace. Therefore,
   211  				// entries' timings can safely be atomically incremented
   212  				// without holding the read lock.
   213  				entries[offset].breakdownTiming.accumulate(bt)
   214  				return true
   215  			}
   216  		}
   217  		offset++ // where to start searching with the write lock below
   218  	}
   219  
   220  	m.mu.Lock()
   221  	entries, ok = m.m[hash]
   222  	if ok {
   223  		for i := range entries[offset:] {
   224  			if entries[offset+i].breakdownMetricsKey == k {
   225  				m.mu.Unlock()
   226  				entries[offset+i].breakdownTiming.accumulate(bt)
   227  				return true
   228  			}
   229  		}
   230  	} else if m.entries >= breakdownMetricsLimit {
   231  		m.mu.Unlock()
   232  		return false
   233  	}
   234  	entry := &m.space[m.entries]
   235  	*entry = breakdownMetricsMapEntry{
   236  		breakdownTiming:     bt,
   237  		breakdownMetricsKey: k,
   238  	}
   239  	m.m[hash] = append(entries, entry)
   240  	m.entries++
   241  	m.mu.Unlock()
   242  	return true
   243  }
   244  
   245  // gather is called by builtinMetricsGatherer to gather breakdown metrics.
   246  func (m *breakdownMetrics) gather(out *Metrics) {
   247  	// Hold m.mu only long enough to swap m.active and m.inactive.
   248  	// This will be blocked by metric updates, but that's OK; only
   249  	// metrics gathering will be delayed. After swapping we do not
   250  	// need to hold m.mu, since nothing concurrently accesses
   251  	// m.inactive while the gatherer is iterating over it.
   252  	m.mu.Lock()
   253  	m.active, m.inactive = m.inactive, m.active
   254  	m.mu.Unlock()
   255  
   256  	for hash, entries := range m.inactive.m {
   257  		for _, entry := range entries {
   258  			if entry.span.count > 0 {
   259  				out.transactionGroupMetrics = append(out.transactionGroupMetrics, &model.Metrics{
   260  					Transaction: model.MetricsTransaction{
   261  						Type: entry.transactionType,
   262  						Name: entry.transactionName,
   263  					},
   264  					Span: model.MetricsSpan{
   265  						Type:    entry.spanType,
   266  						Subtype: entry.spanSubtype,
   267  					},
   268  					Samples: map[string]model.Metric{
   269  						spanSelfTimeCountMetricName: {
   270  							Value: float64(entry.span.count),
   271  						},
   272  						spanSelfTimeSumMetricName: {
   273  							Value: durationMicros(time.Duration(entry.span.duration)),
   274  						},
   275  					},
   276  				})
   277  			}
   278  			entry.breakdownMetricsKey = breakdownMetricsKey{} // release strings
   279  		}
   280  		delete(m.inactive.m, hash)
   281  	}
   282  	m.inactive.entries = 0
   283  }
   284  
   285  // childrenTimer tracks time spent by children of a transaction or span.
   286  //
   287  // childrenTimer is not goroutine-safe.
   288  type childrenTimer struct {
   289  	// active holds the number active children.
   290  	active int
   291  
   292  	// start holds the timestamp at which active went from zero to one.
   293  	start time.Time
   294  
   295  	// totalDuration holds the total duration of time periods in which
   296  	// at least one child was active.
   297  	totalDuration time.Duration
   298  }
   299  
   300  func (t *childrenTimer) childStarted(start time.Time) {
   301  	t.active++
   302  	if t.active == 1 {
   303  		t.start = start
   304  	}
   305  }
   306  
   307  func (t *childrenTimer) childEnded(end time.Time) {
   308  	t.active--
   309  	if t.active == 0 {
   310  		t.totalDuration += end.Sub(t.start)
   311  	}
   312  }
   313  
   314  func (t *childrenTimer) finalDuration(end time.Time) time.Duration {
   315  	if t.active > 0 {
   316  		t.active = 0
   317  		t.totalDuration += end.Sub(t.start)
   318  	}
   319  	return t.totalDuration
   320  }