github.com/waldiirawan/apm-agent-go/v2@v2.2.2/tracer.go (about)

     1  // Licensed to Elasticsearch B.V. under one or more contributor
     2  // license agreements. See the NOTICE file distributed with
     3  // this work for additional information regarding copyright
     4  // ownership. Elasticsearch B.V. licenses this file to you under
     5  // the Apache License, Version 2.0 (the "License"); you may
     6  // not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing,
    12  // software distributed under the License is distributed on an
    13  // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    14  // KIND, either express or implied.  See the License for the
    15  // specific language governing permissions and limitations
    16  // under the License.
    17  
    18  package apm // import "github.com/waldiirawan/apm-agent-go/v2"
    19  
    20  import (
    21  	"bytes"
    22  	"compress/zlib"
    23  	"context"
    24  	"io"
    25  	"log"
    26  	"math/rand"
    27  	"strings"
    28  	"sync"
    29  	"sync/atomic"
    30  	"time"
    31  
    32  	"github.com/waldiirawan/apm-agent-go/v2/apmconfig"
    33  	"github.com/waldiirawan/apm-agent-go/v2/internal/apmlog"
    34  	"github.com/waldiirawan/apm-agent-go/v2/internal/configutil"
    35  	"github.com/waldiirawan/apm-agent-go/v2/internal/iochan"
    36  	"github.com/waldiirawan/apm-agent-go/v2/internal/ringbuffer"
    37  	"github.com/waldiirawan/apm-agent-go/v2/internal/wildcard"
    38  	"github.com/waldiirawan/apm-agent-go/v2/model"
    39  	"github.com/waldiirawan/apm-agent-go/v2/transport"
    40  	"go.elastic.co/fastjson"
    41  )
    42  
    43  const (
    44  	gracePeriodJitter     = 0.1 // +/- 10%
    45  	tracerEventChannelCap = 1000
    46  )
    47  
    48  var (
    49  	tracerMu      sync.RWMutex
    50  	defaultTracer *Tracer
    51  )
    52  
    53  // DefaultTracer returns the default global Tracer, set the first time the
    54  // function is called, or after calling SetDefaultTracer(nil).
    55  //
    56  // The default tracer is configured via environment variables, and will always
    57  // be non-nil. If any of the environment variables are invalid, the
    58  // corresponding errors will be logged to stderr and the default values will be
    59  // used instead.
    60  func DefaultTracer() *Tracer {
    61  	tracerMu.RLock()
    62  	if defaultTracer != nil {
    63  		tracer := defaultTracer
    64  		tracerMu.RUnlock()
    65  		return tracer
    66  	}
    67  	tracerMu.RUnlock()
    68  
    69  	tracerMu.Lock()
    70  	defer tracerMu.Unlock()
    71  	if defaultTracer != nil {
    72  		return defaultTracer
    73  	}
    74  
    75  	var opts TracerOptions
    76  	opts.initDefaults(true)
    77  	defaultTracer = newTracer(opts)
    78  	return defaultTracer
    79  }
    80  
    81  // SetDefaultTracer sets the tracer returned by DefaultTracer.
    82  //
    83  // If a default tracer has already been initialized, it is closed.
    84  // Any queued events are not flushed; it is the responsibility of the
    85  // caller to call the default tracer's Flush method first, if needed.
    86  //
    87  // Calling SetDefaultTracer(nil) will clear the default tracer,
    88  // causing DefaultTracer to initialize a new default tracer.
    89  func SetDefaultTracer(t *Tracer) {
    90  	tracerMu.Lock()
    91  	defer tracerMu.Unlock()
    92  
    93  	if defaultTracer != nil {
    94  		defaultTracer.Close()
    95  	}
    96  	defaultTracer = t
    97  }
    98  
    99  // TracerOptions holds initial tracer options, for passing to NewTracerOptions.
   100  type TracerOptions struct {
   101  	// ServiceName holds the service name.
   102  	//
   103  	// If ServiceName is empty, the service name will be defined using the
   104  	// ELASTIC_APM_SERVICE_NAME environment variable, or if that is not set,
   105  	// the executable name.
   106  	ServiceName string
   107  
   108  	// ServiceVersion holds the service version.
   109  	//
   110  	// If ServiceVersion is empty, the service version will be defined using
   111  	// the ELASTIC_APM_SERVICE_VERSION environment variable.
   112  	ServiceVersion string
   113  
   114  	// ServiceEnvironment holds the service environment.
   115  	//
   116  	// If ServiceEnvironment is empty, the service environment will be defined
   117  	// using the ELASTIC_APM_ENVIRONMENT environment variable.
   118  	ServiceEnvironment string
   119  
   120  	// Transport holds the transport to use for sending events.
   121  	//
   122  	// If Transport is nil, a new HTTP transport will be created from environment
   123  	// variables.
   124  	//
   125  	// If Transport implements apmconfig.Watcher, the tracer will begin watching
   126  	// for remote changes immediately. This behaviour can be disabled by setting
   127  	// the environment variable ELASTIC_APM_CENTRAL_CONFIG=false.
   128  	// If Transport implements the interface below, the tracer will query the
   129  	// APM Server "/" endpoint to obtain the remote major version. Implementers
   130  	// of this interface must cache the remote server version and only refresh
   131  	// on subsequent calls that have `refreshStale` set to true. Implementations
   132  	// must be concurrently safe.
   133  	//   MajorServerVersion(ctx context.Context, refreshStale bool) uint32
   134  	Transport transport.Transport
   135  
   136  	requestDuration           time.Duration
   137  	metricsInterval           time.Duration
   138  	maxSpans                  int
   139  	requestSize               int
   140  	bufferSize                int
   141  	metricsBufferSize         int
   142  	sampler                   Sampler
   143  	sanitizedFieldNames       wildcard.Matchers
   144  	disabledMetrics           wildcard.Matchers
   145  	ignoreTransactionURLs     wildcard.Matchers
   146  	continuationStrategy      string
   147  	captureHeaders            bool
   148  	captureBody               CaptureBodyMode
   149  	spanStackTraceMinDuration time.Duration
   150  	stackTraceLimit           int
   151  	active                    bool
   152  	recording                 bool
   153  	configWatcher             apmconfig.Watcher
   154  	breakdownMetrics          bool
   155  	propagateLegacyHeader     bool
   156  	profileSender             profileSender
   157  	versionGetter             majorVersionGetter
   158  	cpuProfileInterval        time.Duration
   159  	cpuProfileDuration        time.Duration
   160  	heapProfileInterval       time.Duration
   161  	exitSpanMinDuration       time.Duration
   162  	compressionOptions        compressionOptions
   163  	globalLabels              model.StringMap
   164  }
   165  
   166  // initDefaults updates opts with default values.
   167  func (opts *TracerOptions) initDefaults(continueOnError bool) error {
   168  	var errs []error
   169  	failed := func(err error) bool {
   170  		if err == nil {
   171  			return false
   172  		}
   173  		errs = append(errs, err)
   174  		return true
   175  	}
   176  
   177  	requestDuration, err := initialRequestDuration()
   178  	if failed(err) {
   179  		requestDuration = defaultAPIRequestTime
   180  	}
   181  
   182  	metricsInterval, err := initialMetricsInterval()
   183  	if err != nil {
   184  		metricsInterval = defaultMetricsInterval
   185  		errs = append(errs, err)
   186  	}
   187  
   188  	requestSize, err := initialAPIRequestSize()
   189  	if err != nil {
   190  		requestSize = int(defaultAPIRequestSize)
   191  		errs = append(errs, err)
   192  	}
   193  
   194  	bufferSize, err := initialAPIBufferSize()
   195  	if err != nil {
   196  		bufferSize = int(defaultAPIBufferSize)
   197  		errs = append(errs, err)
   198  	}
   199  
   200  	metricsBufferSize, err := initialMetricsBufferSize()
   201  	if err != nil {
   202  		metricsBufferSize = int(defaultMetricsBufferSize)
   203  		errs = append(errs, err)
   204  	}
   205  
   206  	maxSpans, err := initialMaxSpans()
   207  	if failed(err) {
   208  		maxSpans = defaultMaxSpans
   209  	}
   210  
   211  	spanCompressionEnabled, err := initialSpanCompressionEnabled()
   212  	if failed(err) {
   213  		spanCompressionEnabled = defaultSpanCompressionEnabled
   214  	}
   215  
   216  	spanCompressionExactMatchMaxDuration, err := initialSpanCompressionExactMatchMaxDuration()
   217  	if failed(err) {
   218  		spanCompressionExactMatchMaxDuration = defaultSpanCompressionExactMatchMaxDuration
   219  	}
   220  
   221  	spanCompressionSameKindMaxDuration, err := initialSpanCompressionSameKindMaxDuration()
   222  	if failed(err) {
   223  		spanCompressionSameKindMaxDuration = defaultSpanCompressionSameKindMaxDuration
   224  	}
   225  
   226  	sampler, err := initialSampler()
   227  	if failed(err) {
   228  		sampler = nil
   229  	}
   230  
   231  	captureHeaders, err := initialCaptureHeaders()
   232  	if failed(err) {
   233  		captureHeaders = defaultCaptureHeaders
   234  	}
   235  
   236  	captureBody, err := initialCaptureBody()
   237  	if failed(err) {
   238  		captureBody = CaptureBodyOff
   239  	}
   240  
   241  	spanStackTraceMinDuration, err := initialSpanStackTraceMinDuration()
   242  	if failed(err) {
   243  		spanStackTraceMinDuration = defaultSpanStackTraceMinDuration
   244  	}
   245  
   246  	stackTraceLimit, err := initialStackTraceLimit()
   247  	if failed(err) {
   248  		stackTraceLimit = defaultStackTraceLimit
   249  	}
   250  
   251  	active, err := initialActive()
   252  	if failed(err) {
   253  		active = true
   254  	}
   255  
   256  	recording, err := initialRecording()
   257  	if failed(err) {
   258  		recording = true
   259  	}
   260  
   261  	centralConfigEnabled, err := initialCentralConfigEnabled()
   262  	if failed(err) {
   263  		centralConfigEnabled = true
   264  	}
   265  
   266  	breakdownMetricsEnabled, err := initialBreakdownMetricsEnabled()
   267  	if failed(err) {
   268  		breakdownMetricsEnabled = true
   269  	}
   270  
   271  	propagateLegacyHeader, err := initialUseElasticTraceparentHeader()
   272  	if failed(err) {
   273  		propagateLegacyHeader = true
   274  	}
   275  
   276  	cpuProfileInterval, cpuProfileDuration, err := initialCPUProfileIntervalDuration()
   277  	if failed(err) {
   278  		cpuProfileInterval = 0
   279  		cpuProfileDuration = 0
   280  	}
   281  	heapProfileInterval, err := initialHeapProfileInterval()
   282  	if failed(err) {
   283  		heapProfileInterval = 0
   284  	}
   285  
   286  	exitSpanMinDuration, err := initialExitSpanMinDuration()
   287  	if failed(err) {
   288  		exitSpanMinDuration = defaultExitSpanMinDuration
   289  	}
   290  
   291  	continuationStrategy, err := initContinuationStrategy()
   292  	if failed(err) {
   293  		continuationStrategy = defaultContinuationStrategy
   294  	}
   295  
   296  	if opts.ServiceName != "" {
   297  		err := validateServiceName(opts.ServiceName)
   298  		if failed(err) {
   299  			opts.ServiceName = ""
   300  		}
   301  	}
   302  
   303  	serviceName, serviceVersion, serviceEnvironment := initialService()
   304  	if opts.ServiceName == "" {
   305  		opts.ServiceName = serviceName
   306  	}
   307  	if opts.ServiceVersion == "" {
   308  		opts.ServiceVersion = serviceVersion
   309  	}
   310  	if opts.ServiceEnvironment == "" {
   311  		opts.ServiceEnvironment = serviceEnvironment
   312  	}
   313  
   314  	if opts.Transport == nil {
   315  		initialTransport, err := initialTransport(opts.ServiceName, opts.ServiceVersion)
   316  		if failed(err) {
   317  			opts.Transport = transport.NewDiscardTransport(err)
   318  		} else {
   319  			opts.Transport = initialTransport
   320  		}
   321  	}
   322  
   323  	if len(errs) != 0 && !continueOnError {
   324  		return errs[0]
   325  	}
   326  	for _, err := range errs {
   327  		log.Printf("[apm]: %s", err)
   328  	}
   329  
   330  	opts.globalLabels = parseGlobalLabels()
   331  	opts.requestDuration = requestDuration
   332  	opts.metricsInterval = metricsInterval
   333  	opts.requestSize = requestSize
   334  	opts.bufferSize = bufferSize
   335  	opts.metricsBufferSize = metricsBufferSize
   336  	opts.maxSpans = maxSpans
   337  	opts.compressionOptions = compressionOptions{
   338  		enabled:               spanCompressionEnabled,
   339  		exactMatchMaxDuration: spanCompressionExactMatchMaxDuration,
   340  		sameKindMaxDuration:   spanCompressionSameKindMaxDuration,
   341  	}
   342  	opts.sampler = sampler
   343  	opts.sanitizedFieldNames = initialSanitizedFieldNames()
   344  	opts.disabledMetrics = initialDisabledMetrics()
   345  	opts.ignoreTransactionURLs = initialIgnoreTransactionURLs()
   346  	opts.breakdownMetrics = breakdownMetricsEnabled
   347  	opts.captureHeaders = captureHeaders
   348  	opts.captureBody = captureBody
   349  	opts.spanStackTraceMinDuration = spanStackTraceMinDuration
   350  	opts.stackTraceLimit = stackTraceLimit
   351  	opts.active = active
   352  	opts.recording = recording
   353  	opts.propagateLegacyHeader = propagateLegacyHeader
   354  	opts.exitSpanMinDuration = exitSpanMinDuration
   355  	opts.continuationStrategy = continuationStrategy
   356  	if centralConfigEnabled {
   357  		if cw, ok := opts.Transport.(apmconfig.Watcher); ok {
   358  			opts.configWatcher = cw
   359  		}
   360  	}
   361  	if ps, ok := opts.Transport.(profileSender); ok {
   362  		opts.profileSender = ps
   363  		opts.cpuProfileInterval = cpuProfileInterval
   364  		opts.cpuProfileDuration = cpuProfileDuration
   365  		opts.heapProfileInterval = heapProfileInterval
   366  	}
   367  	if vg, ok := opts.Transport.(majorVersionGetter); ok {
   368  		opts.versionGetter = vg
   369  	}
   370  	return nil
   371  }
   372  
   373  type compressionOptions struct {
   374  	enabled               bool
   375  	exactMatchMaxDuration time.Duration
   376  	sameKindMaxDuration   time.Duration
   377  }
   378  
   379  // Tracer manages the sampling and sending of transactions to
   380  // Elastic APM.
   381  //
   382  // Transactions are buffered until they are flushed (forcibly
   383  // with a Flush call, or when the flush timer expires), or when
   384  // the maximum transaction queue size is reached. Failure to
   385  // send will be periodically retried. Once the queue limit has
   386  // been reached, new transactions will replace older ones in
   387  // the queue.
   388  //
   389  // Errors are sent as soon as possible, but will buffered and
   390  // later sent in bulk if the tracer is busy, or otherwise cannot
   391  // send to the server, e.g. due to network failure. There is
   392  // a limit to the number of errors that will be buffered, and
   393  // once that limit has been reached, new errors will be dropped
   394  // until the queue is drained.
   395  type Tracer struct {
   396  	transport         transport.Transport
   397  	service           model.Service
   398  	process           *model.Process
   399  	system            *model.System
   400  	active            int32
   401  	bufferSize        int
   402  	metricsBufferSize int
   403  	closing           chan struct{}
   404  	closed            chan struct{}
   405  	forceFlush        chan chan<- struct{}
   406  	forceSendMetrics  chan chan<- struct{}
   407  	configCommands    chan tracerConfigCommand
   408  	configWatcher     chan apmconfig.Watcher
   409  	events            chan tracerEvent
   410  	breakdownMetrics  *breakdownMetrics
   411  	profileSender     profileSender
   412  	versionGetter     majorVersionGetter
   413  	globalLabels      model.StringMap
   414  
   415  	// stats is heap-allocated to ensure correct alignment for atomic access.
   416  	stats *TracerStats
   417  
   418  	// instrumentationConfig_ must only be accessed and mutated
   419  	// using Tracer.instrumentationConfig() and Tracer.setInstrumentationConfig().
   420  	instrumentationConfigInternal *instrumentationConfig
   421  
   422  	errorDataPool       sync.Pool
   423  	spanDataPool        sync.Pool
   424  	transactionDataPool sync.Pool
   425  }
   426  
   427  // NewTracer returns a new Tracer, using the default transport,
   428  // and with the specified service name and version if specified.
   429  // This is equivalent to calling NewTracerOptions with a
   430  // TracerOptions having ServiceName and ServiceVersion set to
   431  // the provided arguments.
   432  func NewTracer(serviceName, serviceVersion string) (*Tracer, error) {
   433  	return NewTracerOptions(TracerOptions{
   434  		ServiceName:    serviceName,
   435  		ServiceVersion: serviceVersion,
   436  	})
   437  }
   438  
   439  // NewTracerOptions returns a new Tracer using the provided options.
   440  // See TracerOptions for details on the options, and their default
   441  // values.
   442  func NewTracerOptions(opts TracerOptions) (*Tracer, error) {
   443  	if err := opts.initDefaults(false); err != nil {
   444  		return nil, err
   445  	}
   446  	return newTracer(opts), nil
   447  }
   448  
   449  func newTracer(opts TracerOptions) *Tracer {
   450  	t := &Tracer{
   451  		transport: opts.Transport,
   452  		service: makeService(
   453  			opts.ServiceName,
   454  			opts.ServiceVersion,
   455  			opts.ServiceEnvironment,
   456  		),
   457  		process:           &currentProcess,
   458  		system:            &localSystem,
   459  		closing:           make(chan struct{}),
   460  		closed:            make(chan struct{}),
   461  		forceFlush:        make(chan chan<- struct{}),
   462  		forceSendMetrics:  make(chan chan<- struct{}),
   463  		configCommands:    make(chan tracerConfigCommand),
   464  		configWatcher:     make(chan apmconfig.Watcher),
   465  		events:            make(chan tracerEvent, tracerEventChannelCap),
   466  		active:            1,
   467  		breakdownMetrics:  newBreakdownMetrics(),
   468  		stats:             &TracerStats{},
   469  		bufferSize:        opts.bufferSize,
   470  		metricsBufferSize: opts.metricsBufferSize,
   471  		profileSender:     opts.profileSender,
   472  		versionGetter:     opts.versionGetter,
   473  		instrumentationConfigInternal: &instrumentationConfig{
   474  			local: make(map[string]func(*instrumentationConfigValues)),
   475  		},
   476  		globalLabels: opts.globalLabels,
   477  	}
   478  	t.breakdownMetrics.enabled = opts.breakdownMetrics
   479  	// Initialise local transaction config.
   480  	t.setLocalInstrumentationConfig(envRecording, func(cfg *instrumentationConfigValues) {
   481  		cfg.recording = opts.recording
   482  	})
   483  	t.setLocalInstrumentationConfig(envCaptureBody, func(cfg *instrumentationConfigValues) {
   484  		cfg.captureBody = opts.captureBody
   485  	})
   486  	t.setLocalInstrumentationConfig(envCaptureHeaders, func(cfg *instrumentationConfigValues) {
   487  		cfg.captureHeaders = opts.captureHeaders
   488  	})
   489  	t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) {
   490  		cfg.maxSpans = opts.maxSpans
   491  	})
   492  	t.setLocalInstrumentationConfig(envSpanCompressionEnabled, func(cfg *instrumentationConfigValues) {
   493  		cfg.compressionOptions.enabled = opts.compressionOptions.enabled
   494  	})
   495  	t.setLocalInstrumentationConfig(envSpanCompressionExactMatchMaxDuration, func(cfg *instrumentationConfigValues) {
   496  		cfg.compressionOptions.exactMatchMaxDuration = opts.compressionOptions.exactMatchMaxDuration
   497  	})
   498  	t.setLocalInstrumentationConfig(envSpanCompressionSameKindMaxDuration, func(cfg *instrumentationConfigValues) {
   499  		cfg.compressionOptions.sameKindMaxDuration = opts.compressionOptions.sameKindMaxDuration
   500  	})
   501  	t.setLocalInstrumentationConfig(envTransactionSampleRate, func(cfg *instrumentationConfigValues) {
   502  		cfg.sampler = opts.sampler
   503  	})
   504  	t.setLocalInstrumentationConfig(envSpanStackTraceMinDuration, func(cfg *instrumentationConfigValues) {
   505  		cfg.spanStackTraceMinDuration = opts.spanStackTraceMinDuration
   506  	})
   507  	t.setLocalInstrumentationConfig(envStackTraceLimit, func(cfg *instrumentationConfigValues) {
   508  		cfg.stackTraceLimit = opts.stackTraceLimit
   509  	})
   510  	t.setLocalInstrumentationConfig(envUseElasticTraceparentHeader, func(cfg *instrumentationConfigValues) {
   511  		cfg.propagateLegacyHeader = opts.propagateLegacyHeader
   512  	})
   513  	t.setLocalInstrumentationConfig(envSanitizeFieldNames, func(cfg *instrumentationConfigValues) {
   514  		cfg.sanitizedFieldNames = opts.sanitizedFieldNames
   515  	})
   516  	t.setLocalInstrumentationConfig(envIgnoreURLs, func(cfg *instrumentationConfigValues) {
   517  		cfg.ignoreTransactionURLs = opts.ignoreTransactionURLs
   518  	})
   519  	t.setLocalInstrumentationConfig(envExitSpanMinDuration, func(cfg *instrumentationConfigValues) {
   520  		cfg.exitSpanMinDuration = opts.exitSpanMinDuration
   521  	})
   522  	t.setLocalInstrumentationConfig(envContinuationStrategy, func(cfg *instrumentationConfigValues) {
   523  		cfg.continuationStrategy = opts.continuationStrategy
   524  	})
   525  	if logger := apmlog.DefaultLogger(); logger != nil {
   526  		defaultLogLevel := logger.Level()
   527  		t.setLocalInstrumentationConfig(apmlog.EnvLogLevel, func(cfg *instrumentationConfigValues) {
   528  			// Revert to the original, local, log level when
   529  			// the centrally defined log level is removed.
   530  			logger.SetLevel(defaultLogLevel)
   531  		})
   532  	}
   533  
   534  	if !opts.active {
   535  		t.active = 0
   536  		close(t.closed)
   537  		return t
   538  	}
   539  
   540  	go t.loop()
   541  	t.configCommands <- func(cfg *tracerConfig) {
   542  		cfg.recording = opts.recording
   543  		cfg.cpuProfileInterval = opts.cpuProfileInterval
   544  		cfg.cpuProfileDuration = opts.cpuProfileDuration
   545  		cfg.heapProfileInterval = opts.heapProfileInterval
   546  		cfg.metricsInterval = opts.metricsInterval
   547  		cfg.requestDuration = opts.requestDuration
   548  		cfg.requestSize = opts.requestSize
   549  		cfg.disabledMetrics = opts.disabledMetrics
   550  		cfg.metricsGatherers = []MetricsGatherer{newBuiltinMetricsGatherer(t)}
   551  		if logger := apmlog.DefaultLogger(); logger != nil {
   552  			cfg.logger = logger
   553  		}
   554  	}
   555  	if opts.configWatcher != nil {
   556  		t.configWatcher <- opts.configWatcher
   557  	}
   558  	return t
   559  }
   560  
   561  // tracerConfig holds the tracer's runtime configuration, which may be modified
   562  // by sending a tracerConfigCommand to the tracer's configCommands channel.
   563  type tracerConfig struct {
   564  	recording           bool
   565  	requestSize         int
   566  	requestDuration     time.Duration
   567  	metricsInterval     time.Duration
   568  	logger              Logger
   569  	metricsGatherers    []MetricsGatherer
   570  	disabledMetrics     wildcard.Matchers
   571  	cpuProfileDuration  time.Duration
   572  	cpuProfileInterval  time.Duration
   573  	heapProfileInterval time.Duration
   574  }
   575  
   576  type tracerConfigCommand func(*tracerConfig)
   577  
   578  // Close closes the Tracer, preventing transactions from being
   579  // sent to the APM server.
   580  func (t *Tracer) Close() {
   581  	select {
   582  	case <-t.closing:
   583  	default:
   584  		close(t.closing)
   585  	}
   586  	<-t.closed
   587  }
   588  
   589  // Flush waits for the Tracer to flush any transactions and errors it currently
   590  // has queued to the APM server, the tracer is stopped, or the abort channel
   591  // is signaled.
   592  func (t *Tracer) Flush(abort <-chan struct{}) {
   593  	flushed := make(chan struct{}, 1)
   594  	select {
   595  	case t.forceFlush <- flushed:
   596  		select {
   597  		case <-abort:
   598  		case <-flushed:
   599  		case <-t.closed:
   600  		}
   601  	case <-t.closed:
   602  	}
   603  }
   604  
   605  // Recording reports whether the tracer is recording events. Instrumentation
   606  // may use this to avoid creating transactions, spans, and metrics when the
   607  // tracer is configured to not record.
   608  //
   609  // Recording will also return false if the tracer is inactive.
   610  func (t *Tracer) Recording() bool {
   611  	return t.instrumentationConfig().recording && t.Active()
   612  }
   613  
   614  // Active reports whether the tracer is active. If the tracer is inactive,
   615  // no transactions or errors will be sent to the Elastic APM server.
   616  func (t *Tracer) Active() bool {
   617  	return atomic.LoadInt32(&t.active) == 1
   618  }
   619  
   620  // ShouldPropagateLegacyHeader reports whether instrumentation should
   621  // propagate the legacy "Elastic-Apm-Traceparent" header in addition to
   622  // the standard W3C "traceparent" header.
   623  //
   624  // This method will be removed in a future major version when we remove
   625  // support for propagating the legacy header.
   626  func (t *Tracer) ShouldPropagateLegacyHeader() bool {
   627  	return t.instrumentationConfig().propagateLegacyHeader
   628  }
   629  
   630  // SetRequestDuration sets the maximum amount of time to keep a request open
   631  // to the APM server for streaming data before closing the stream and starting
   632  // a new request.
   633  func (t *Tracer) SetRequestDuration(d time.Duration) {
   634  	t.sendConfigCommand(func(cfg *tracerConfig) {
   635  		cfg.requestDuration = d
   636  	})
   637  }
   638  
   639  // SetMetricsInterval sets the metrics interval -- the amount of time in
   640  // between metrics samples being gathered.
   641  func (t *Tracer) SetMetricsInterval(d time.Duration) {
   642  	t.sendConfigCommand(func(cfg *tracerConfig) {
   643  		cfg.metricsInterval = d
   644  	})
   645  }
   646  
   647  // SetLogger sets the Logger to be used for logging the operation of
   648  // the tracer.
   649  //
   650  // The tracer is initialized with a default logger configured with the
   651  // environment variables ELASTIC_APM_LOG_FILE and ELASTIC_APM_LOG_LEVEL.
   652  // Calling SetLogger will replace the default logger.
   653  func (t *Tracer) SetLogger(logger Logger) {
   654  	t.sendConfigCommand(func(cfg *tracerConfig) {
   655  		cfg.logger = logger
   656  	})
   657  }
   658  
   659  // SetSanitizedFieldNames sets the wildcard patterns that will be used to
   660  // match cookie and form field names for sanitization. Fields matching any
   661  // of the the supplied patterns will have their values redacted. If
   662  // SetSanitizedFieldNames is called with no arguments, then no fields
   663  // will be redacted.
   664  //
   665  // Configuration via Kibana takes precedence over local configuration, so
   666  // if sanitized_field_names has been configured via Kibana, this call will
   667  // not have any effect until/unless that configuration has been removed.
   668  func (t *Tracer) SetSanitizedFieldNames(patterns ...string) error {
   669  	var matchers wildcard.Matchers
   670  	if len(patterns) != 0 {
   671  		matchers = make(wildcard.Matchers, len(patterns))
   672  		for i, p := range patterns {
   673  			matchers[i] = configutil.ParseWildcardPattern(p)
   674  		}
   675  	}
   676  	t.setLocalInstrumentationConfig(envSanitizeFieldNames, func(cfg *instrumentationConfigValues) {
   677  		cfg.sanitizedFieldNames = matchers
   678  	})
   679  	return nil
   680  }
   681  
   682  // SetIgnoreTransactionURLs sets the wildcard patterns that will be used to
   683  // ignore transactions with matching URLs.
   684  func (t *Tracer) SetIgnoreTransactionURLs(pattern string) error {
   685  	t.setLocalInstrumentationConfig(envIgnoreURLs, func(cfg *instrumentationConfigValues) {
   686  		cfg.ignoreTransactionURLs = configutil.ParseWildcardPatterns(pattern)
   687  	})
   688  	return nil
   689  }
   690  
   691  // RegisterMetricsGatherer registers g for periodic (or forced) metrics
   692  // gathering by t.
   693  //
   694  // RegisterMetricsGatherer returns a function which will deregister g.
   695  // It may safely be called multiple times.
   696  func (t *Tracer) RegisterMetricsGatherer(g MetricsGatherer) func() {
   697  	// Wrap g in a pointer-to-struct, so we can safely compare.
   698  	wrapped := &struct{ MetricsGatherer }{MetricsGatherer: g}
   699  	t.sendConfigCommand(func(cfg *tracerConfig) {
   700  		cfg.metricsGatherers = append(cfg.metricsGatherers, wrapped)
   701  	})
   702  	deregister := func(cfg *tracerConfig) {
   703  		for i, g := range cfg.metricsGatherers {
   704  			if g != wrapped {
   705  				continue
   706  			}
   707  			cfg.metricsGatherers = append(cfg.metricsGatherers[:i], cfg.metricsGatherers[i+1:]...)
   708  		}
   709  	}
   710  	var once sync.Once
   711  	return func() {
   712  		once.Do(func() {
   713  			t.sendConfigCommand(deregister)
   714  		})
   715  	}
   716  }
   717  
   718  // SetConfigWatcher sets w as the config watcher.
   719  //
   720  // By default, the tracer will be configured to use the transport for
   721  // watching config, if the transport implements apmconfig.Watcher. This
   722  // can be overridden by calling SetConfigWatcher.
   723  //
   724  // If w is nil, config watching will be stopped.
   725  //
   726  // Calling SetConfigWatcher will discard any previously observed remote
   727  // config, reverting to local config until a config change from w is
   728  // observed.
   729  func (t *Tracer) SetConfigWatcher(w apmconfig.Watcher) {
   730  	select {
   731  	case t.configWatcher <- w:
   732  	case <-t.closing:
   733  	case <-t.closed:
   734  	}
   735  }
   736  
   737  func (t *Tracer) sendConfigCommand(cmd tracerConfigCommand) {
   738  	select {
   739  	case t.configCommands <- cmd:
   740  	case <-t.closing:
   741  	case <-t.closed:
   742  	}
   743  }
   744  
   745  // SetRecording enables or disables recording of future events.
   746  //
   747  // SetRecording does not affect in-flight events.
   748  func (t *Tracer) SetRecording(r bool) {
   749  	t.setLocalInstrumentationConfig(envRecording, func(cfg *instrumentationConfigValues) {
   750  		// Update instrumentation config to disable transactions and errors.
   751  		cfg.recording = r
   752  	})
   753  	t.sendConfigCommand(func(cfg *tracerConfig) {
   754  		// Consult t.instrumentationConfig() as local config may not be in effect,
   755  		// or there may have been a concurrent change to instrumentation config.
   756  		cfg.recording = t.instrumentationConfig().recording
   757  	})
   758  }
   759  
   760  // SetSampler sets the sampler the tracer.
   761  //
   762  // It is valid to pass nil, in which case all transactions will be sampled.
   763  //
   764  // Configuration via Kibana takes precedence over local configuration, so
   765  // if sampling has been configured via Kibana, this call will not have any
   766  // effect until/unless that configuration has been removed.
   767  func (t *Tracer) SetSampler(s Sampler) {
   768  	t.setLocalInstrumentationConfig(envTransactionSampleRate, func(cfg *instrumentationConfigValues) {
   769  		cfg.sampler = s
   770  	})
   771  }
   772  
   773  // SetMaxSpans sets the maximum number of spans that will be added
   774  // to a transaction before dropping spans.
   775  //
   776  // Passing in zero will disable all spans, while negative values will
   777  // permit an unlimited number of spans.
   778  func (t *Tracer) SetMaxSpans(n int) {
   779  	t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) {
   780  		cfg.maxSpans = n
   781  	})
   782  }
   783  
   784  // SetSpanCompressionEnabled enables/disables the span compression feature.
   785  func (t *Tracer) SetSpanCompressionEnabled(v bool) {
   786  	t.setLocalInstrumentationConfig(envSpanCompressionEnabled, func(cfg *instrumentationConfigValues) {
   787  		cfg.compressionOptions.enabled = v
   788  	})
   789  }
   790  
   791  // SetSpanCompressionExactMatchMaxDuration sets the maximum duration for a span
   792  // to be compressed with `compression_strategy` == `exact_match`.
   793  func (t *Tracer) SetSpanCompressionExactMatchMaxDuration(v time.Duration) {
   794  	t.setLocalInstrumentationConfig(envSpanCompressionExactMatchMaxDuration, func(cfg *instrumentationConfigValues) {
   795  		cfg.compressionOptions.exactMatchMaxDuration = v
   796  	})
   797  }
   798  
   799  // SetSpanCompressionSameKindMaxDuration sets the maximum duration for a span
   800  // to be compressed with `compression_strategy` == `same_kind`.
   801  func (t *Tracer) SetSpanCompressionSameKindMaxDuration(v time.Duration) {
   802  	t.setLocalInstrumentationConfig(envSpanCompressionSameKindMaxDuration, func(cfg *instrumentationConfigValues) {
   803  		cfg.compressionOptions.sameKindMaxDuration = v
   804  	})
   805  }
   806  
   807  // SetSpanStackTraceMinDuration sets the minimum duration for a span after which
   808  // we will capture its stack frames.
   809  func (t *Tracer) SetSpanStackTraceMinDuration(d time.Duration) {
   810  	t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) {
   811  		cfg.spanStackTraceMinDuration = d
   812  	})
   813  }
   814  
   815  // SetStackTraceLimit sets the the maximum number of stack frames to collect
   816  // for each stack trace. If limit is negative, then all frames will be collected.
   817  func (t *Tracer) SetStackTraceLimit(limit int) {
   818  	t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) {
   819  		cfg.stackTraceLimit = limit
   820  	})
   821  }
   822  
   823  // SetCaptureHeaders enables or disables capturing of HTTP headers.
   824  func (t *Tracer) SetCaptureHeaders(capture bool) {
   825  	t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) {
   826  		cfg.captureHeaders = capture
   827  	})
   828  }
   829  
   830  // SetCaptureBody sets the HTTP request body capture mode.
   831  func (t *Tracer) SetCaptureBody(mode CaptureBodyMode) {
   832  	t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) {
   833  		cfg.captureBody = mode
   834  	})
   835  }
   836  
   837  // SetExitSpanMinDuration sets the minimum duration for an exit span to not be
   838  // dropped.
   839  func (t *Tracer) SetExitSpanMinDuration(v time.Duration) {
   840  	t.setLocalInstrumentationConfig(envExitSpanMinDuration, func(cfg *instrumentationConfigValues) {
   841  		cfg.exitSpanMinDuration = v
   842  	})
   843  }
   844  
   845  // SetContinuationStrategy sets the continuation strategy.
   846  func (t *Tracer) SetContinuationStrategy(v string) {
   847  	t.setLocalInstrumentationConfig(envContinuationStrategy, func(cfg *instrumentationConfigValues) {
   848  		cfg.continuationStrategy = v
   849  	})
   850  }
   851  
   852  // SendMetrics forces the tracer to gather and send metrics immediately,
   853  // blocking until the metrics have been sent or the abort channel is
   854  // signalled.
   855  func (t *Tracer) SendMetrics(abort <-chan struct{}) {
   856  	sent := make(chan struct{}, 1)
   857  	select {
   858  	case t.forceSendMetrics <- sent:
   859  		select {
   860  		case <-abort:
   861  		case <-sent:
   862  		case <-t.closed:
   863  		}
   864  	case <-t.closed:
   865  	}
   866  }
   867  
   868  // Stats returns the current TracerStats. This will return the most
   869  // recent values even after the tracer has been closed.
   870  func (t *Tracer) Stats() TracerStats {
   871  	return t.stats.copy()
   872  }
   873  
   874  func (t *Tracer) loop() {
   875  	ctx, cancelContext := context.WithCancel(context.Background())
   876  	defer cancelContext()
   877  	defer close(t.closed)
   878  	defer atomic.StoreInt32(&t.active, 0)
   879  
   880  	var req iochan.ReadRequest
   881  	var requestBuf bytes.Buffer
   882  	var metadata []byte
   883  	var gracePeriod time.Duration = -1
   884  	var flushed chan<- struct{}
   885  	var requestBufTransactions, requestBufSpans, requestBufErrors, requestBufMetricsets uint64
   886  	zlibWriter, _ := zlib.NewWriterLevel(&requestBuf, zlib.BestSpeed)
   887  	zlibFlushed := true
   888  	zlibClosed := false
   889  	iochanReader := iochan.NewReader()
   890  	requestBytesRead := 0
   891  	requestActive := false
   892  	closeRequest := false
   893  	flushRequest := false
   894  	requestResult := make(chan error, 1)
   895  	requestTimer := time.NewTimer(0)
   896  	requestTimerActive := false
   897  	if !requestTimer.Stop() {
   898  		<-requestTimer.C
   899  	}
   900  
   901  	// Run another goroutine to perform the blocking requests,
   902  	// communicating with the tracer loop to obtain stream data.
   903  	sendStreamRequest := make(chan time.Duration)
   904  	done := make(chan struct{})
   905  	defer func() {
   906  		close(sendStreamRequest)
   907  		<-done
   908  	}()
   909  	go func() {
   910  		defer close(done)
   911  		jitterRand := rand.New(rand.NewSource(time.Now().UnixNano()))
   912  		for gracePeriod := range sendStreamRequest {
   913  			if gracePeriod > 0 {
   914  				select {
   915  				case <-time.After(jitterDuration(gracePeriod, jitterRand, gracePeriodJitter)):
   916  				case <-ctx.Done():
   917  				}
   918  			}
   919  			requestResult <- t.transport.SendStream(ctx, iochanReader)
   920  		}
   921  	}()
   922  
   923  	refreshServerVersionDeadline := 10 * time.Second
   924  	refreshVersionTicker := time.NewTicker(refreshServerVersionDeadline)
   925  	defer refreshVersionTicker.Stop()
   926  	if t.versionGetter != nil {
   927  		go t.maybeRefreshServerVersion(ctx, refreshServerVersionDeadline)
   928  	} else {
   929  		// If versionGetter is nil, stop the timer.
   930  		refreshVersionTicker.Stop()
   931  	}
   932  
   933  	var breakdownMetricsLimitWarningLogged bool
   934  	var stats TracerStats
   935  	var metrics Metrics
   936  	var sentMetrics chan<- struct{}
   937  	var gatheringMetrics bool
   938  	var metricsTimerStart time.Time
   939  	metricsBuffer := ringbuffer.New(t.metricsBufferSize)
   940  	gatheredMetrics := make(chan struct{}, 1)
   941  	metricsTimer := time.NewTimer(0)
   942  	if !metricsTimer.Stop() {
   943  		<-metricsTimer.C
   944  	}
   945  
   946  	var lastConfigChange map[string]string
   947  	var configChanges <-chan apmconfig.Change
   948  	var stopConfigWatcher func()
   949  	defer func() {
   950  		if stopConfigWatcher != nil {
   951  			stopConfigWatcher()
   952  		}
   953  	}()
   954  
   955  	cpuProfilingState := newCPUProfilingState(t.profileSender)
   956  	heapProfilingState := newHeapProfilingState(t.profileSender)
   957  
   958  	var cfg tracerConfig
   959  	buffer := ringbuffer.New(t.bufferSize)
   960  	buffer.Evicted = func(h ringbuffer.BlockHeader) {
   961  		switch h.Tag {
   962  		case errorBlockTag:
   963  			stats.ErrorsDropped++
   964  		case spanBlockTag:
   965  			stats.SpansDropped++
   966  		case transactionBlockTag:
   967  			stats.TransactionsDropped++
   968  		}
   969  	}
   970  	modelWriter := modelWriter{
   971  		buffer:        buffer,
   972  		metricsBuffer: metricsBuffer,
   973  		cfg:           &cfg,
   974  		stats:         &stats,
   975  	}
   976  
   977  	handleTracerConfigCommand := func(cmd tracerConfigCommand) {
   978  		var oldMetricsInterval time.Duration
   979  		if cfg.recording {
   980  			oldMetricsInterval = cfg.metricsInterval
   981  		}
   982  		cmd(&cfg)
   983  		var metricsInterval, cpuProfileInterval, cpuProfileDuration, heapProfileInterval time.Duration
   984  		if cfg.recording {
   985  			metricsInterval = cfg.metricsInterval
   986  			cpuProfileInterval = cfg.cpuProfileInterval
   987  			cpuProfileDuration = cfg.cpuProfileDuration
   988  			heapProfileInterval = cfg.heapProfileInterval
   989  		}
   990  
   991  		cpuProfilingState.updateConfig(cpuProfileInterval, cpuProfileDuration)
   992  		heapProfilingState.updateConfig(heapProfileInterval, 0)
   993  		if !gatheringMetrics && metricsInterval != oldMetricsInterval {
   994  			if metricsTimerStart.IsZero() {
   995  				if metricsInterval > 0 {
   996  					metricsTimer.Reset(metricsInterval)
   997  					metricsTimerStart = time.Now()
   998  				}
   999  			} else {
  1000  				if metricsInterval <= 0 {
  1001  					metricsTimerStart = time.Time{}
  1002  					if !metricsTimer.Stop() {
  1003  						<-metricsTimer.C
  1004  					}
  1005  				} else {
  1006  					alreadyPassed := time.Since(metricsTimerStart)
  1007  					if alreadyPassed >= metricsInterval {
  1008  						metricsTimer.Reset(0)
  1009  					} else {
  1010  						metricsTimer.Reset(metricsInterval - alreadyPassed)
  1011  					}
  1012  				}
  1013  			}
  1014  		}
  1015  	}
  1016  
  1017  	for {
  1018  		var gatherMetrics bool
  1019  		select {
  1020  		case <-t.closing:
  1021  			cancelContext() // informs transport that EOF is expected
  1022  			iochanReader.CloseRead(io.EOF)
  1023  			return
  1024  		case cmd := <-t.configCommands:
  1025  			handleTracerConfigCommand(cmd)
  1026  			continue
  1027  		case cw := <-t.configWatcher:
  1028  			if configChanges != nil {
  1029  				stopConfigWatcher()
  1030  				t.updateRemoteConfig(cfg.logger, lastConfigChange, nil)
  1031  				lastConfigChange = nil
  1032  				configChanges = nil
  1033  			}
  1034  			if cw == nil {
  1035  				continue
  1036  			}
  1037  			var configWatcherContext context.Context
  1038  			var watchParams apmconfig.WatchParams
  1039  			watchParams.Service.Name = t.service.Name
  1040  			watchParams.Service.Environment = t.service.Environment
  1041  			configWatcherContext, stopConfigWatcher = context.WithCancel(ctx)
  1042  			configChanges = cw.WatchConfig(configWatcherContext, watchParams)
  1043  			// Silence go vet's "possible context leak" false positive.
  1044  			// We call a previous stopConfigWatcher before reassigning
  1045  			// the variable, and we have a defer at the top level of the
  1046  			// loop method that will call the final stopConfigWatcher
  1047  			// value on method exit.
  1048  			_ = stopConfigWatcher
  1049  			continue
  1050  		case change, ok := <-configChanges:
  1051  			if !ok {
  1052  				configChanges = nil
  1053  				continue
  1054  			}
  1055  			if change.Err != nil {
  1056  				if cfg.logger != nil {
  1057  					cfg.logger.Errorf("config request failed: %s", change.Err)
  1058  				}
  1059  			} else {
  1060  				t.updateRemoteConfig(cfg.logger, lastConfigChange, change.Attrs)
  1061  				lastConfigChange = change.Attrs
  1062  				handleTracerConfigCommand(func(cfg *tracerConfig) {
  1063  					cfg.recording = t.instrumentationConfig().recording
  1064  				})
  1065  			}
  1066  			continue
  1067  		case <-refreshVersionTicker.C:
  1068  			go t.maybeRefreshServerVersion(ctx, refreshServerVersionDeadline)
  1069  		case event := <-t.events:
  1070  			switch event.eventType {
  1071  			case transactionEvent:
  1072  				if !t.breakdownMetrics.recordTransaction(event.tx.TransactionData) {
  1073  					if !breakdownMetricsLimitWarningLogged && cfg.logger != nil {
  1074  						cfg.logger.Warningf("%s", breakdownMetricsLimitWarning)
  1075  						breakdownMetricsLimitWarningLogged = true
  1076  					}
  1077  				}
  1078  				// Drop unsampled transactions when the APM Server is >= 8.0
  1079  				drop := t.maybeDropTransaction(
  1080  					ctx, event.tx.TransactionData, event.tx.Sampled(),
  1081  				)
  1082  				if !drop {
  1083  					modelWriter.writeTransaction(event.tx.Transaction, event.tx.TransactionData)
  1084  				}
  1085  			case spanEvent:
  1086  				modelWriter.writeSpan(event.span.Span, event.span.SpanData)
  1087  			case errorEvent:
  1088  				modelWriter.writeError(event.err)
  1089  				// Flush the buffer to transmit the error immediately.
  1090  				flushRequest = true
  1091  			}
  1092  		case <-requestTimer.C:
  1093  			requestTimerActive = false
  1094  			closeRequest = true
  1095  		case <-metricsTimer.C:
  1096  			metricsTimerStart = time.Time{}
  1097  			gatherMetrics = !gatheringMetrics
  1098  		case sentMetrics = <-t.forceSendMetrics:
  1099  			if cfg.recording {
  1100  				if !metricsTimerStart.IsZero() {
  1101  					if !metricsTimer.Stop() {
  1102  						<-metricsTimer.C
  1103  					}
  1104  					metricsTimerStart = time.Time{}
  1105  				}
  1106  				gatherMetrics = !gatheringMetrics
  1107  			}
  1108  		case <-gatheredMetrics:
  1109  			modelWriter.writeMetrics(&metrics)
  1110  			gatheringMetrics = false
  1111  			flushRequest = true
  1112  			if cfg.recording && cfg.metricsInterval > 0 {
  1113  				metricsTimerStart = time.Now()
  1114  				metricsTimer.Reset(cfg.metricsInterval)
  1115  			}
  1116  		case <-cpuProfilingState.timer.C:
  1117  			cpuProfilingState.start(ctx, cfg.logger, t.metadataReader())
  1118  		case <-cpuProfilingState.finished:
  1119  			cpuProfilingState.resetTimer()
  1120  		case <-heapProfilingState.timer.C:
  1121  			heapProfilingState.start(ctx, cfg.logger, t.metadataReader())
  1122  		case <-heapProfilingState.finished:
  1123  			heapProfilingState.resetTimer()
  1124  		case flushed = <-t.forceFlush:
  1125  			// Drain any objects buffered in the channels.
  1126  			for n := len(t.events); n > 0; n-- {
  1127  				event := <-t.events
  1128  				switch event.eventType {
  1129  				case transactionEvent:
  1130  					if !t.breakdownMetrics.recordTransaction(event.tx.TransactionData) {
  1131  						if !breakdownMetricsLimitWarningLogged && cfg.logger != nil {
  1132  							cfg.logger.Warningf("%s", breakdownMetricsLimitWarning)
  1133  							breakdownMetricsLimitWarningLogged = true
  1134  						}
  1135  					}
  1136  					// Drop unsampled transactions when the APM Server is >= 8.0
  1137  					drop := t.maybeDropTransaction(
  1138  						ctx, event.tx.TransactionData, event.tx.Sampled(),
  1139  					)
  1140  					if !drop {
  1141  						modelWriter.writeTransaction(event.tx.Transaction, event.tx.TransactionData)
  1142  					}
  1143  				case spanEvent:
  1144  					modelWriter.writeSpan(event.span.Span, event.span.SpanData)
  1145  				case errorEvent:
  1146  					modelWriter.writeError(event.err)
  1147  				}
  1148  			}
  1149  			if !requestActive && buffer.Len() == 0 && metricsBuffer.Len() == 0 {
  1150  				flushed <- struct{}{}
  1151  				continue
  1152  			}
  1153  			closeRequest = true
  1154  		case req = <-iochanReader.C:
  1155  		case err := <-requestResult:
  1156  			if err != nil {
  1157  				stats.Errors.SendStream++
  1158  				gracePeriod = nextGracePeriod(gracePeriod)
  1159  				if cfg.logger != nil {
  1160  					logf := cfg.logger.Debugf
  1161  					if err, ok := err.(*transport.HTTPError); ok && err.Response.StatusCode == 404 {
  1162  						// 404 typically means the server is too old, meaning
  1163  						// the error is due to a misconfigured environment.
  1164  						logf = cfg.logger.Errorf
  1165  					}
  1166  					logf("request failed: %s (next request in ~%s)", err, gracePeriod)
  1167  				}
  1168  			} else {
  1169  				gracePeriod = -1 // Reset grace period after success.
  1170  				stats.TransactionsSent += requestBufTransactions
  1171  				stats.SpansSent += requestBufSpans
  1172  				stats.ErrorsSent += requestBufErrors
  1173  				if cfg.logger != nil {
  1174  					s := func(n uint64) string {
  1175  						if n != 1 {
  1176  							return "s"
  1177  						}
  1178  						return ""
  1179  					}
  1180  					cfg.logger.Debugf(
  1181  						"sent request with %d transaction%s, %d span%s, %d error%s, %d metricset%s",
  1182  						requestBufTransactions, s(requestBufTransactions),
  1183  						requestBufSpans, s(requestBufSpans),
  1184  						requestBufErrors, s(requestBufErrors),
  1185  						requestBufMetricsets, s(requestBufMetricsets),
  1186  					)
  1187  				}
  1188  			}
  1189  			if !stats.isZero() {
  1190  				t.stats.accumulate(stats)
  1191  				stats = TracerStats{}
  1192  			}
  1193  			if sentMetrics != nil && requestBufMetricsets > 0 {
  1194  				sentMetrics <- struct{}{}
  1195  				sentMetrics = nil
  1196  			}
  1197  			if flushed != nil {
  1198  				flushed <- struct{}{}
  1199  				flushed = nil
  1200  			}
  1201  			if req.Buf != nil {
  1202  				// req will be canceled by CloseRead below.
  1203  				req.Buf = nil
  1204  			}
  1205  			iochanReader.CloseRead(io.EOF)
  1206  			iochanReader = iochan.NewReader()
  1207  			flushRequest = false
  1208  			closeRequest = false
  1209  			requestActive = false
  1210  			requestBytesRead = 0
  1211  			requestBuf.Reset()
  1212  			requestBufTransactions = 0
  1213  			requestBufSpans = 0
  1214  			requestBufErrors = 0
  1215  			requestBufMetricsets = 0
  1216  			if requestTimerActive {
  1217  				if !requestTimer.Stop() {
  1218  					<-requestTimer.C
  1219  				}
  1220  				requestTimerActive = false
  1221  			}
  1222  		}
  1223  
  1224  		if !stats.isZero() {
  1225  			t.stats.accumulate(stats)
  1226  			stats = TracerStats{}
  1227  		}
  1228  
  1229  		if gatherMetrics {
  1230  			gatheringMetrics = true
  1231  			metrics.disabled = cfg.disabledMetrics
  1232  			t.gatherMetrics(ctx, cfg.metricsGatherers, &metrics, cfg.logger, gatheredMetrics)
  1233  			if cfg.logger != nil {
  1234  				cfg.logger.Debugf("gathering metrics")
  1235  			}
  1236  		}
  1237  
  1238  		if !requestActive {
  1239  			if buffer.Len() == 0 && metricsBuffer.Len() == 0 {
  1240  				continue
  1241  			}
  1242  			sendStreamRequest <- gracePeriod
  1243  			if metadata == nil {
  1244  				metadata = t.jsonRequestMetadata()
  1245  			}
  1246  			zlibWriter.Reset(&requestBuf)
  1247  			zlibWriter.Write(metadata)
  1248  			zlibFlushed = false
  1249  			zlibClosed = false
  1250  			requestActive = true
  1251  			requestTimer.Reset(cfg.requestDuration)
  1252  			requestTimerActive = true
  1253  		}
  1254  
  1255  		if !closeRequest || !zlibClosed {
  1256  			for requestBytesRead+requestBuf.Len() < cfg.requestSize {
  1257  				if metricsBuffer.Len() > 0 {
  1258  					if _, _, err := metricsBuffer.WriteBlockTo(zlibWriter); err == nil {
  1259  						requestBufMetricsets++
  1260  						zlibWriter.Write([]byte("\n"))
  1261  						zlibFlushed = false
  1262  						if sentMetrics != nil {
  1263  							// SendMetrics was called: close the request
  1264  							// off so we can inform the user when the
  1265  							// metrics have been processed.
  1266  							closeRequest = true
  1267  						}
  1268  					}
  1269  					continue
  1270  				}
  1271  				if buffer.Len() == 0 {
  1272  					break
  1273  				}
  1274  				if h, _, err := buffer.WriteBlockTo(zlibWriter); err == nil {
  1275  					switch h.Tag {
  1276  					case transactionBlockTag:
  1277  						requestBufTransactions++
  1278  					case spanBlockTag:
  1279  						requestBufSpans++
  1280  					case errorBlockTag:
  1281  						requestBufErrors++
  1282  					}
  1283  					zlibWriter.Write([]byte("\n"))
  1284  					zlibFlushed = false
  1285  				}
  1286  			}
  1287  			if !closeRequest {
  1288  				closeRequest = requestBytesRead+requestBuf.Len() >= cfg.requestSize
  1289  			}
  1290  		}
  1291  		if closeRequest {
  1292  			if !zlibClosed {
  1293  				zlibWriter.Close()
  1294  				zlibClosed = true
  1295  			}
  1296  		} else if flushRequest && !zlibFlushed {
  1297  			zlibWriter.Flush()
  1298  			flushRequest = false
  1299  			zlibFlushed = true
  1300  		}
  1301  
  1302  		if req.Buf == nil || requestBuf.Len() == 0 {
  1303  			continue
  1304  		}
  1305  		const zlibHeaderLen = 2
  1306  		if requestBytesRead+requestBuf.Len() > zlibHeaderLen {
  1307  			n, err := requestBuf.Read(req.Buf)
  1308  			if closeRequest && err == nil && requestBuf.Len() == 0 {
  1309  				err = io.EOF
  1310  			}
  1311  			req.Respond(n, err)
  1312  			req.Buf = nil
  1313  			if n > 0 {
  1314  				requestBytesRead += n
  1315  			}
  1316  		}
  1317  	}
  1318  }
  1319  
  1320  // jsonRequestMetadata returns a JSON-encoded metadata object that features
  1321  // at the head of every request body. This is called exactly once, when the
  1322  // first request is made.
  1323  func (t *Tracer) jsonRequestMetadata() []byte {
  1324  	var json fastjson.Writer
  1325  	json.RawString(`{"metadata":`)
  1326  	t.encodeRequestMetadata(&json)
  1327  	json.RawString("}\n")
  1328  	return json.Bytes()
  1329  }
  1330  
  1331  // metadataReader returns an io.Reader that holds the JSON-encoded metadata,
  1332  // suitable for including in a profile request.
  1333  func (t *Tracer) metadataReader() io.Reader {
  1334  	var metadata fastjson.Writer
  1335  	t.encodeRequestMetadata(&metadata)
  1336  	return bytes.NewReader(metadata.Bytes())
  1337  }
  1338  
  1339  func (t *Tracer) encodeRequestMetadata(json *fastjson.Writer) {
  1340  	json.RawString(`{"system":`)
  1341  	t.system.MarshalFastJSON(json)
  1342  	json.RawString(`,"process":`)
  1343  	t.process.MarshalFastJSON(json)
  1344  	json.RawString(`,"service":`)
  1345  	t.service.MarshalFastJSON(json)
  1346  	if cloud := getCloudMetadata(); cloud != nil {
  1347  		json.RawString(`,"cloud":`)
  1348  		cloud.MarshalFastJSON(json)
  1349  	}
  1350  	if len(t.globalLabels) > 0 {
  1351  		json.RawString(`,"labels":`)
  1352  		t.globalLabels.MarshalFastJSON(json)
  1353  	}
  1354  	json.RawByte('}')
  1355  }
  1356  
  1357  // gatherMetrics gathers metrics from each of the registered
  1358  // metrics gatherers. Once all gatherers have returned, a value
  1359  // will be sent on the "gathered" channel.
  1360  func (t *Tracer) gatherMetrics(ctx context.Context, gatherers []MetricsGatherer, m *Metrics, l Logger, gathered chan<- struct{}) {
  1361  	timestamp := model.Time(time.Now().UTC())
  1362  	var group sync.WaitGroup
  1363  	for _, g := range gatherers {
  1364  		group.Add(1)
  1365  		go func(g MetricsGatherer) {
  1366  			defer group.Done()
  1367  			gatherMetrics(ctx, g, m, l)
  1368  		}(g)
  1369  	}
  1370  	go func() {
  1371  		group.Wait()
  1372  		for _, m := range m.transactionGroupMetrics {
  1373  			m.Timestamp = timestamp
  1374  		}
  1375  		for _, m := range m.metrics {
  1376  			m.Timestamp = timestamp
  1377  		}
  1378  		gathered <- struct{}{}
  1379  	}()
  1380  }
  1381  
  1382  // maybeDropTransaction may drop a transaction, for example when the transaction
  1383  // is non-sampled and the target server version is 8.0 or greater.
  1384  // maybeDropTransaction returns true if the transaction is dropped, false otherwise.
  1385  func (t *Tracer) maybeDropTransaction(ctx context.Context, td *TransactionData, sampled bool) bool {
  1386  	if sampled || t.versionGetter == nil {
  1387  		return false
  1388  	}
  1389  
  1390  	v := t.versionGetter.MajorServerVersion(ctx, false)
  1391  	dropUnsampled := v >= 8
  1392  	if dropUnsampled {
  1393  		td.reset(t)
  1394  	}
  1395  	return dropUnsampled
  1396  }
  1397  
  1398  // maybeRefreshServerVersion refreshes the remote APM Server version if the version
  1399  // has been marked as stale.
  1400  func (t *Tracer) maybeRefreshServerVersion(ctx context.Context, deadline time.Duration) {
  1401  	if t.versionGetter == nil {
  1402  		return
  1403  	}
  1404  
  1405  	// Fast path, when the version has been cached, there's nothing to do.
  1406  	if v := t.versionGetter.MajorServerVersion(ctx, false); v > 0 {
  1407  		return
  1408  	}
  1409  
  1410  	// If there isn't a cached version, try to refresh the version.
  1411  	if deadline > 0 {
  1412  		var cancel context.CancelFunc
  1413  		ctx, cancel = context.WithTimeout(ctx, deadline)
  1414  		defer cancel()
  1415  	}
  1416  	_ = t.versionGetter.MajorServerVersion(ctx, true)
  1417  	return
  1418  }
  1419  
  1420  type tracerEventType int
  1421  
  1422  const (
  1423  	transactionEvent tracerEventType = iota
  1424  	spanEvent
  1425  	errorEvent
  1426  )
  1427  
  1428  type tracerEvent struct {
  1429  	eventType tracerEventType
  1430  
  1431  	// err is set only if eventType == errorEvent.
  1432  	err *ErrorData
  1433  
  1434  	// tx is set only if eventType == transactionEvent.
  1435  	tx struct {
  1436  		*Transaction
  1437  		// Transaction.TransactionData is nil at the
  1438  		// point tracerEvent is created (to signify
  1439  		// that the transaction is ended), so we pass
  1440  		// it along side.
  1441  		*TransactionData
  1442  	}
  1443  
  1444  	// span is set only if eventType == spanEvent.
  1445  	span struct {
  1446  		*Span
  1447  		// Span.SpanData is nil at the point tracerEvent
  1448  		// is created (to signify that the span is ended),
  1449  		// so we pass it along side.
  1450  		*SpanData
  1451  	}
  1452  }
  1453  
  1454  type majorVersionGetter interface {
  1455  	// MajorServerVersion returns the APM Server's major version. When refreshStale
  1456  	// is true` it will request the remote APM Server's version from `/`, otherwise
  1457  	// it will return the cached version. If the returned first argument is 0, the
  1458  	// cache is stale.
  1459  	MajorServerVersion(ctx context.Context, refreshStale bool) uint32
  1460  }
  1461  
  1462  func parseGlobalLabels() model.StringMap {
  1463  	var labels model.StringMap
  1464  	for _, kv := range configutil.ParseListEnv(envGlobalLabels, ",", nil) {
  1465  		i := strings.IndexRune(kv, '=')
  1466  		if i > 0 {
  1467  			k, v := strings.TrimSpace(kv[:i]), strings.TrimSpace(kv[i+1:])
  1468  			labels = append(labels, model.StringMapItem{
  1469  				Key:   cleanLabelKey(k),
  1470  				Value: truncateString(v),
  1471  			})
  1472  		}
  1473  	}
  1474  	return labels
  1475  }