github.com/waldiirawan/apm-agent-go/v2@v2.2.2/transaction.go (about)

     1  // Licensed to Elasticsearch B.V. under one or more contributor
     2  // license agreements. See the NOTICE file distributed with
     3  // this work for additional information regarding copyright
     4  // ownership. Elasticsearch B.V. licenses this file to you under
     5  // the Apache License, Version 2.0 (the "License"); you may
     6  // not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing,
    12  // software distributed under the License is distributed on an
    13  // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    14  // KIND, either express or implied.  See the License for the
    15  // specific language governing permissions and limitations
    16  // under the License.
    17  
    18  package apm // import "github.com/waldiirawan/apm-agent-go/v2"
    19  
    20  import (
    21  	cryptorand "crypto/rand"
    22  	"encoding/binary"
    23  	"math/rand"
    24  	"sync"
    25  	"time"
    26  )
    27  
    28  const (
    29  	// maxDroppedSpanStats sets the hard limit for the number of dropped span
    30  	// stats that are stored in a transaction.
    31  	maxDroppedSpanStats = 128
    32  )
    33  
    34  // StartTransaction returns a new Transaction with the specified
    35  // name and type, and with the start time set to the current time.
    36  // This is equivalent to calling StartTransactionOptions with a
    37  // zero TransactionOptions.
    38  func (t *Tracer) StartTransaction(name, transactionType string) *Transaction {
    39  	return t.StartTransactionOptions(name, transactionType, TransactionOptions{})
    40  }
    41  
    42  // StartTransactionOptions returns a new Transaction with the
    43  // specified name, type, and options.
    44  func (t *Tracer) StartTransactionOptions(name, transactionType string, opts TransactionOptions) *Transaction {
    45  	td, _ := t.transactionDataPool.Get().(*TransactionData)
    46  	if td == nil {
    47  		td = &TransactionData{
    48  			Duration: -1,
    49  			Context: Context{
    50  				captureBodyMask: CaptureBodyTransactions,
    51  			},
    52  			spanTimings:       make(spanTimingsMap),
    53  			droppedSpansStats: make(droppedSpanTimingsMap, maxDroppedSpanStats),
    54  		}
    55  		var seed int64
    56  		if err := binary.Read(cryptorand.Reader, binary.LittleEndian, &seed); err != nil {
    57  			seed = time.Now().UnixNano()
    58  		}
    59  		td.rand = rand.New(rand.NewSource(seed))
    60  	}
    61  	tx := &Transaction{tracer: t, TransactionData: td}
    62  
    63  	// Take a snapshot of config that should apply to all spans within the
    64  	// transaction.
    65  	instrumentationConfig := t.instrumentationConfig()
    66  	tx.recording = instrumentationConfig.recording
    67  	if !tx.recording || !t.Active() {
    68  		return tx
    69  	}
    70  
    71  	tx.maxSpans = instrumentationConfig.maxSpans
    72  	tx.compressedSpan.options = instrumentationConfig.compressionOptions
    73  	tx.exitSpanMinDuration = instrumentationConfig.exitSpanMinDuration
    74  	tx.spanStackTraceMinDuration = instrumentationConfig.spanStackTraceMinDuration
    75  	tx.stackTraceLimit = instrumentationConfig.stackTraceLimit
    76  	tx.Context.captureHeaders = instrumentationConfig.captureHeaders
    77  	tx.propagateLegacyHeader = instrumentationConfig.propagateLegacyHeader
    78  	tx.Context.sanitizedFieldNames = instrumentationConfig.sanitizedFieldNames
    79  	tx.breakdownMetricsEnabled = t.breakdownMetrics.enabled
    80  
    81  	continuationStrategy := instrumentationConfig.continuationStrategy
    82  	shouldRestartTrace := false
    83  	if continuationStrategy == "restart_external" {
    84  		if opts.TraceContext.State.haveElastic {
    85  			continuationStrategy = "continue"
    86  		} else {
    87  			continuationStrategy = "restart"
    88  		}
    89  	}
    90  
    91  	if continuationStrategy == "restart" {
    92  		if !opts.TraceContext.Trace.isZero() && !opts.TraceContext.Span.isZero() {
    93  			link := SpanLink{
    94  				Trace: opts.TraceContext.Trace,
    95  				Span:  opts.TraceContext.Span,
    96  			}
    97  			tx.links = append(tx.links, link)
    98  			shouldRestartTrace = true
    99  		}
   100  	}
   101  
   102  	var root bool
   103  	if opts.TraceContext.Trace.Validate() == nil && !shouldRestartTrace {
   104  		tx.traceContext.Trace = opts.TraceContext.Trace
   105  		tx.traceContext.Options = opts.TraceContext.Options
   106  		if opts.TraceContext.Span.Validate() == nil {
   107  			tx.parentID = opts.TraceContext.Span
   108  		}
   109  		if opts.TransactionID.Validate() == nil {
   110  			tx.traceContext.Span = opts.TransactionID
   111  		} else {
   112  			binary.LittleEndian.PutUint64(tx.traceContext.Span[:], tx.rand.Uint64())
   113  		}
   114  		if opts.TraceContext.State.Validate() == nil {
   115  			tx.traceContext.State = opts.TraceContext.State
   116  		}
   117  	} else {
   118  		// Start a new trace. We reuse the trace ID for the root transaction's ID
   119  		// if one is not specified in the options.
   120  		root = true
   121  		binary.LittleEndian.PutUint64(tx.traceContext.Trace[:8], tx.rand.Uint64())
   122  		binary.LittleEndian.PutUint64(tx.traceContext.Trace[8:], tx.rand.Uint64())
   123  		if opts.TransactionID.Validate() == nil {
   124  			tx.traceContext.Span = opts.TransactionID
   125  		} else {
   126  			copy(tx.traceContext.Span[:], tx.traceContext.Trace[:])
   127  		}
   128  	}
   129  
   130  	if root {
   131  		var result SampleResult
   132  		if instrumentationConfig.sampler != nil {
   133  			result = instrumentationConfig.sampler.Sample(SampleParams{
   134  				TraceContext: tx.traceContext,
   135  			})
   136  			if !result.Sampled {
   137  				// Special case: for unsampled transactions we
   138  				// report a sample rate of 0, so that we do not
   139  				// count them in aggregations in the server.
   140  				// This is necessary to avoid overcounting, as
   141  				// we will scale the sampled transactions.
   142  				result.SampleRate = 0
   143  			}
   144  			sampleRate := roundSampleRate(result.SampleRate)
   145  			tx.traceContext.State = NewTraceState(TraceStateEntry{
   146  				Key:   elasticTracestateVendorKey,
   147  				Value: formatElasticTracestateValue(sampleRate),
   148  			})
   149  		} else {
   150  			result.Sampled = true
   151  		}
   152  		if result.Sampled {
   153  			o := tx.traceContext.Options.WithRecorded(true)
   154  			tx.traceContext.Options = o
   155  		}
   156  	} else {
   157  		// TODO(axw) make this behaviour configurable. In some cases
   158  		// it may not be a good idea to honour the recorded flag, as
   159  		// it may open up the application to DoS by forced sampling.
   160  		// Even ignoring bad actors, a service that has many feeder
   161  		// applications may end up being sampled at a very high rate.
   162  		tx.traceContext.Options = opts.TraceContext.Options
   163  	}
   164  
   165  	tx.Name = name
   166  	tx.Type = transactionType
   167  	tx.timestamp = opts.Start
   168  	if tx.timestamp.IsZero() {
   169  		tx.timestamp = time.Now()
   170  	}
   171  	tx.links = append(tx.links, opts.Links...)
   172  	return tx
   173  }
   174  
   175  // TransactionOptions holds options for Tracer.StartTransactionOptions.
   176  type TransactionOptions struct {
   177  	// TraceContext holds the TraceContext for a new transaction. If this is
   178  	// zero, a new trace will be started.
   179  	TraceContext TraceContext
   180  
   181  	// TransactionID holds the ID to assign to the transaction. If this is
   182  	// zero, a new ID will be generated and used instead.
   183  	TransactionID SpanID
   184  
   185  	// Start is the start time of the transaction. If this has the
   186  	// zero value, time.Now() will be used instead.
   187  	Start time.Time
   188  
   189  	// Links, if non-nil, holds a list of spans linked to the transaction.
   190  	Links []SpanLink
   191  }
   192  
   193  // Transaction describes an event occurring in the monitored service.
   194  type Transaction struct {
   195  	tracer       *Tracer
   196  	traceContext TraceContext
   197  	parentID     SpanID
   198  
   199  	mu sync.RWMutex
   200  
   201  	// TransactionData holds the transaction data. This field is set to
   202  	// nil when either of the transaction's End or Discard methods are called.
   203  	*TransactionData
   204  }
   205  
   206  // Sampled reports whether or not the transaction is sampled.
   207  func (tx *Transaction) Sampled() bool {
   208  	if tx == nil {
   209  		return false
   210  	}
   211  	return tx.traceContext.Options.Recorded()
   212  }
   213  
   214  // TraceContext returns the transaction's TraceContext.
   215  //
   216  // The resulting TraceContext's Span field holds the transaction's ID.
   217  // If tx is nil, a zero (invalid) TraceContext is returned.
   218  func (tx *Transaction) TraceContext() TraceContext {
   219  	if tx == nil {
   220  		return TraceContext{}
   221  	}
   222  	return tx.traceContext
   223  }
   224  
   225  // ShouldPropagateLegacyHeader reports whether instrumentation should
   226  // propagate the legacy "Elastic-Apm-Traceparent" header in addition to
   227  // the standard W3C "traceparent" header.
   228  //
   229  // This method will be removed in a future major version when we remove
   230  // support for propagating the legacy header.
   231  func (tx *Transaction) ShouldPropagateLegacyHeader() bool {
   232  	tx.mu.Lock()
   233  	defer tx.mu.Unlock()
   234  	if tx.ended() {
   235  		return false
   236  	}
   237  	return tx.propagateLegacyHeader
   238  }
   239  
   240  // EnsureParent returns the span ID for for tx's parent, generating a
   241  // parent span ID if one has not already been set and tx has not been
   242  // ended. If tx is nil or has been ended, a zero (invalid) SpanID is
   243  // returned.
   244  //
   245  // This method can be used for generating a span ID for the RUM
   246  // (Real User Monitoring) agent, where the RUM agent is initialized
   247  // after the backend service returns.
   248  func (tx *Transaction) EnsureParent() SpanID {
   249  	if tx == nil {
   250  		return SpanID{}
   251  	}
   252  
   253  	tx.mu.Lock()
   254  	defer tx.mu.Unlock()
   255  	if tx.ended() {
   256  		return SpanID{}
   257  	}
   258  
   259  	if tx.parentID.isZero() {
   260  		// parentID can only be zero if tx is a root transaction
   261  		// for which GenerateParentTraceContext() has not previously
   262  		// been called. Reuse the latter half of the trace ID for
   263  		// the parent span ID; the first half is used for the
   264  		// transaction ID.
   265  		copy(tx.parentID[:], tx.traceContext.Trace[8:])
   266  	}
   267  	return tx.parentID
   268  }
   269  
   270  // ParentID returns the ID of the transaction's Parent or a zero (invalid) SpanID.
   271  func (tx *Transaction) ParentID() SpanID {
   272  	if tx == nil {
   273  		return SpanID{}
   274  	}
   275  	tx.mu.RLock()
   276  	defer tx.mu.RUnlock()
   277  	return tx.parentID
   278  }
   279  
   280  // Discard discards a previously started transaction.
   281  //
   282  // Calling Discard will set tx's TransactionData field to nil, so callers must
   283  // ensure tx is not updated after Discard returns.
   284  func (tx *Transaction) Discard() {
   285  	tx.mu.Lock()
   286  	defer tx.mu.Unlock()
   287  	if tx.ended() {
   288  		return
   289  	}
   290  	tx.reset(tx.tracer)
   291  	tx.TransactionData = nil
   292  }
   293  
   294  // End enqueues tx for sending to the Elastic APM server.
   295  //
   296  // Calling End will set tx's TransactionData field to nil, so callers
   297  // must ensure tx is not updated after End returns.
   298  //
   299  // If tx.Duration has not been set, End will set it to the elapsed time
   300  // since the transaction's start time.
   301  func (tx *Transaction) End() {
   302  	tx.mu.Lock()
   303  	defer tx.mu.Unlock()
   304  	if tx.ended() {
   305  		return
   306  	}
   307  	if tx.Type == "" {
   308  		tx.Type = "custom"
   309  	}
   310  	if tx.recording {
   311  		if tx.Duration < 0 {
   312  			tx.Duration = time.Since(tx.timestamp)
   313  		}
   314  		if tx.Outcome == "" {
   315  			tx.Outcome = tx.Context.outcome()
   316  			if tx.Outcome == "" {
   317  				if tx.errorCaptured {
   318  					tx.Outcome = "failure"
   319  				} else {
   320  					tx.Outcome = "success"
   321  				}
   322  			}
   323  		}
   324  		// Hold the transaction data lock to check if the transaction has any
   325  		// compressed spans in its cache, if so, evict cache and end the span.
   326  		tx.TransactionData.mu.Lock()
   327  		if evictedSpan := tx.compressedSpan.evict(); evictedSpan != nil {
   328  			evictedSpan.end()
   329  		}
   330  		tx.TransactionData.mu.Unlock()
   331  		tx.enqueue()
   332  	} else {
   333  		tx.reset(tx.tracer)
   334  	}
   335  	tx.TransactionData = nil
   336  }
   337  
   338  func (tx *Transaction) enqueue() {
   339  	event := tracerEvent{eventType: transactionEvent}
   340  	event.tx.Transaction = tx
   341  	event.tx.TransactionData = tx.TransactionData
   342  	select {
   343  	case tx.tracer.events <- event:
   344  	default:
   345  		// Enqueuing a transaction should never block.
   346  		tx.tracer.breakdownMetrics.recordTransaction(tx.TransactionData)
   347  
   348  		tx.tracer.stats.accumulate(TracerStats{TransactionsDropped: 1})
   349  		tx.reset(tx.tracer)
   350  	}
   351  }
   352  
   353  // ended reports whether or not End or Discard has been called.
   354  //
   355  // This must be called with tx.mu held.
   356  func (tx *Transaction) ended() bool {
   357  	return tx.TransactionData == nil
   358  }
   359  
   360  // TransactionData holds the details for a transaction, and is embedded
   361  // inside Transaction. When a transaction is ended, its TransactionData
   362  // field will be set to nil.
   363  type TransactionData struct {
   364  	// Name holds the transaction name, initialized with the value
   365  	// passed to StartTransaction.
   366  	Name string
   367  
   368  	// Type holds the transaction type, initialized with the value
   369  	// passed to StartTransaction.
   370  	Type string
   371  
   372  	// Duration holds the transaction duration, initialized to -1.
   373  	//
   374  	// If you do not update Duration, calling Transaction.End will
   375  	// calculate the duration based on the elapsed time since the
   376  	// transaction's start time.
   377  	Duration time.Duration
   378  
   379  	// Context describes the context in which the transaction occurs.
   380  	Context Context
   381  
   382  	// Result holds the transaction result.
   383  	Result string
   384  
   385  	// Outcome holds the transaction outcome: success, failure, or
   386  	// unknown (the default). If Outcome is set to something else,
   387  	// it will be replaced with "unknown".
   388  	//
   389  	// Outcome is used for error rate calculations. A value of "success"
   390  	// indicates that a transaction succeeded, while "failure" indicates
   391  	// that the transaction failed. If Outcome is set to "unknown" (or
   392  	// some other value), then the transaction will not be included in
   393  	// error rate calculations.
   394  	Outcome string
   395  
   396  	recording                 bool
   397  	maxSpans                  int
   398  	exitSpanMinDuration       time.Duration
   399  	spanStackTraceMinDuration time.Duration
   400  	stackTraceLimit           int
   401  	breakdownMetricsEnabled   bool
   402  	propagateLegacyHeader     bool
   403  	timestamp                 time.Time
   404  
   405  	links             []SpanLink
   406  	mu                sync.Mutex
   407  	errorCaptured     bool
   408  	spansCreated      int
   409  	spansDropped      int
   410  	childrenTimer     childrenTimer
   411  	spanTimings       spanTimingsMap
   412  	droppedSpansStats droppedSpanTimingsMap
   413  	rand              *rand.Rand // for ID generation
   414  
   415  	compressedSpan compressedSpan
   416  }
   417  
   418  // reset resets the TransactionData back to its zero state and places it back
   419  // into the transaction pool.
   420  func (td *TransactionData) reset(tracer *Tracer) {
   421  	*td = TransactionData{
   422  		Context:           td.Context,
   423  		Duration:          -1,
   424  		rand:              td.rand,
   425  		spanTimings:       td.spanTimings,
   426  		droppedSpansStats: td.droppedSpansStats,
   427  	}
   428  	td.Context.reset()
   429  	td.spanTimings.reset()
   430  	td.droppedSpansStats.reset()
   431  	tracer.transactionDataPool.Put(td)
   432  }
   433  
   434  type droppedSpanTimingsKey struct {
   435  	serviceTargetType string
   436  	serviceTargetName string
   437  	destination       string
   438  	outcome           string
   439  }
   440  
   441  // droppedSpanTimingsMap records span timings for groups of dropped spans.
   442  type droppedSpanTimingsMap map[droppedSpanTimingsKey]spanTiming
   443  
   444  // add accumulates the timing for a {destination, outcome} pair, silently drops
   445  // any pairs that would cause the map to exceed the maxDroppedSpanStats.
   446  func (m droppedSpanTimingsMap) add(targetType, targetName, dst, outcome string, count int, d time.Duration) {
   447  	k := droppedSpanTimingsKey{
   448  		serviceTargetType: targetType,
   449  		serviceTargetName: targetName,
   450  		destination:       dst,
   451  		outcome:           outcome,
   452  	}
   453  	timing, ok := m[k]
   454  	if ok || maxDroppedSpanStats > len(m) {
   455  		timing.count += uint64(count)
   456  		timing.duration += int64(d)
   457  		m[k] = timing
   458  	}
   459  }
   460  
   461  // reset resets m back to its initial zero state.
   462  func (m droppedSpanTimingsMap) reset() {
   463  	for k := range m {
   464  		delete(m, k)
   465  	}
   466  }