github.com/waldiirawan/apm-agent-go/v2@v2.2.2/tracer.go (about) 1 // Licensed to Elasticsearch B.V. under one or more contributor 2 // license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright 4 // ownership. Elasticsearch B.V. licenses this file to you under 5 // the Apache License, Version 2.0 (the "License"); you may 6 // not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, 12 // software distributed under the License is distributed on an 13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 // KIND, either express or implied. See the License for the 15 // specific language governing permissions and limitations 16 // under the License. 17 18 package apm // import "github.com/waldiirawan/apm-agent-go/v2" 19 20 import ( 21 "bytes" 22 "compress/zlib" 23 "context" 24 "io" 25 "log" 26 "math/rand" 27 "strings" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 "github.com/waldiirawan/apm-agent-go/v2/apmconfig" 33 "github.com/waldiirawan/apm-agent-go/v2/internal/apmlog" 34 "github.com/waldiirawan/apm-agent-go/v2/internal/configutil" 35 "github.com/waldiirawan/apm-agent-go/v2/internal/iochan" 36 "github.com/waldiirawan/apm-agent-go/v2/internal/ringbuffer" 37 "github.com/waldiirawan/apm-agent-go/v2/internal/wildcard" 38 "github.com/waldiirawan/apm-agent-go/v2/model" 39 "github.com/waldiirawan/apm-agent-go/v2/transport" 40 "go.elastic.co/fastjson" 41 ) 42 43 const ( 44 gracePeriodJitter = 0.1 // +/- 10% 45 tracerEventChannelCap = 1000 46 ) 47 48 var ( 49 tracerMu sync.RWMutex 50 defaultTracer *Tracer 51 ) 52 53 // DefaultTracer returns the default global Tracer, set the first time the 54 // function is called, or after calling SetDefaultTracer(nil). 55 // 56 // The default tracer is configured via environment variables, and will always 57 // be non-nil. If any of the environment variables are invalid, the 58 // corresponding errors will be logged to stderr and the default values will be 59 // used instead. 60 func DefaultTracer() *Tracer { 61 tracerMu.RLock() 62 if defaultTracer != nil { 63 tracer := defaultTracer 64 tracerMu.RUnlock() 65 return tracer 66 } 67 tracerMu.RUnlock() 68 69 tracerMu.Lock() 70 defer tracerMu.Unlock() 71 if defaultTracer != nil { 72 return defaultTracer 73 } 74 75 var opts TracerOptions 76 opts.initDefaults(true) 77 defaultTracer = newTracer(opts) 78 return defaultTracer 79 } 80 81 // SetDefaultTracer sets the tracer returned by DefaultTracer. 82 // 83 // If a default tracer has already been initialized, it is closed. 84 // Any queued events are not flushed; it is the responsibility of the 85 // caller to call the default tracer's Flush method first, if needed. 86 // 87 // Calling SetDefaultTracer(nil) will clear the default tracer, 88 // causing DefaultTracer to initialize a new default tracer. 89 func SetDefaultTracer(t *Tracer) { 90 tracerMu.Lock() 91 defer tracerMu.Unlock() 92 93 if defaultTracer != nil { 94 defaultTracer.Close() 95 } 96 defaultTracer = t 97 } 98 99 // TracerOptions holds initial tracer options, for passing to NewTracerOptions. 100 type TracerOptions struct { 101 // ServiceName holds the service name. 102 // 103 // If ServiceName is empty, the service name will be defined using the 104 // ELASTIC_APM_SERVICE_NAME environment variable, or if that is not set, 105 // the executable name. 106 ServiceName string 107 108 // ServiceVersion holds the service version. 109 // 110 // If ServiceVersion is empty, the service version will be defined using 111 // the ELASTIC_APM_SERVICE_VERSION environment variable. 112 ServiceVersion string 113 114 // ServiceEnvironment holds the service environment. 115 // 116 // If ServiceEnvironment is empty, the service environment will be defined 117 // using the ELASTIC_APM_ENVIRONMENT environment variable. 118 ServiceEnvironment string 119 120 // Transport holds the transport to use for sending events. 121 // 122 // If Transport is nil, a new HTTP transport will be created from environment 123 // variables. 124 // 125 // If Transport implements apmconfig.Watcher, the tracer will begin watching 126 // for remote changes immediately. This behaviour can be disabled by setting 127 // the environment variable ELASTIC_APM_CENTRAL_CONFIG=false. 128 // If Transport implements the interface below, the tracer will query the 129 // APM Server "/" endpoint to obtain the remote major version. Implementers 130 // of this interface must cache the remote server version and only refresh 131 // on subsequent calls that have `refreshStale` set to true. Implementations 132 // must be concurrently safe. 133 // MajorServerVersion(ctx context.Context, refreshStale bool) uint32 134 Transport transport.Transport 135 136 requestDuration time.Duration 137 metricsInterval time.Duration 138 maxSpans int 139 requestSize int 140 bufferSize int 141 metricsBufferSize int 142 sampler Sampler 143 sanitizedFieldNames wildcard.Matchers 144 disabledMetrics wildcard.Matchers 145 ignoreTransactionURLs wildcard.Matchers 146 continuationStrategy string 147 captureHeaders bool 148 captureBody CaptureBodyMode 149 spanStackTraceMinDuration time.Duration 150 stackTraceLimit int 151 active bool 152 recording bool 153 configWatcher apmconfig.Watcher 154 breakdownMetrics bool 155 propagateLegacyHeader bool 156 profileSender profileSender 157 versionGetter majorVersionGetter 158 cpuProfileInterval time.Duration 159 cpuProfileDuration time.Duration 160 heapProfileInterval time.Duration 161 exitSpanMinDuration time.Duration 162 compressionOptions compressionOptions 163 globalLabels model.StringMap 164 } 165 166 // initDefaults updates opts with default values. 167 func (opts *TracerOptions) initDefaults(continueOnError bool) error { 168 var errs []error 169 failed := func(err error) bool { 170 if err == nil { 171 return false 172 } 173 errs = append(errs, err) 174 return true 175 } 176 177 requestDuration, err := initialRequestDuration() 178 if failed(err) { 179 requestDuration = defaultAPIRequestTime 180 } 181 182 metricsInterval, err := initialMetricsInterval() 183 if err != nil { 184 metricsInterval = defaultMetricsInterval 185 errs = append(errs, err) 186 } 187 188 requestSize, err := initialAPIRequestSize() 189 if err != nil { 190 requestSize = int(defaultAPIRequestSize) 191 errs = append(errs, err) 192 } 193 194 bufferSize, err := initialAPIBufferSize() 195 if err != nil { 196 bufferSize = int(defaultAPIBufferSize) 197 errs = append(errs, err) 198 } 199 200 metricsBufferSize, err := initialMetricsBufferSize() 201 if err != nil { 202 metricsBufferSize = int(defaultMetricsBufferSize) 203 errs = append(errs, err) 204 } 205 206 maxSpans, err := initialMaxSpans() 207 if failed(err) { 208 maxSpans = defaultMaxSpans 209 } 210 211 spanCompressionEnabled, err := initialSpanCompressionEnabled() 212 if failed(err) { 213 spanCompressionEnabled = defaultSpanCompressionEnabled 214 } 215 216 spanCompressionExactMatchMaxDuration, err := initialSpanCompressionExactMatchMaxDuration() 217 if failed(err) { 218 spanCompressionExactMatchMaxDuration = defaultSpanCompressionExactMatchMaxDuration 219 } 220 221 spanCompressionSameKindMaxDuration, err := initialSpanCompressionSameKindMaxDuration() 222 if failed(err) { 223 spanCompressionSameKindMaxDuration = defaultSpanCompressionSameKindMaxDuration 224 } 225 226 sampler, err := initialSampler() 227 if failed(err) { 228 sampler = nil 229 } 230 231 captureHeaders, err := initialCaptureHeaders() 232 if failed(err) { 233 captureHeaders = defaultCaptureHeaders 234 } 235 236 captureBody, err := initialCaptureBody() 237 if failed(err) { 238 captureBody = CaptureBodyOff 239 } 240 241 spanStackTraceMinDuration, err := initialSpanStackTraceMinDuration() 242 if failed(err) { 243 spanStackTraceMinDuration = defaultSpanStackTraceMinDuration 244 } 245 246 stackTraceLimit, err := initialStackTraceLimit() 247 if failed(err) { 248 stackTraceLimit = defaultStackTraceLimit 249 } 250 251 active, err := initialActive() 252 if failed(err) { 253 active = true 254 } 255 256 recording, err := initialRecording() 257 if failed(err) { 258 recording = true 259 } 260 261 centralConfigEnabled, err := initialCentralConfigEnabled() 262 if failed(err) { 263 centralConfigEnabled = true 264 } 265 266 breakdownMetricsEnabled, err := initialBreakdownMetricsEnabled() 267 if failed(err) { 268 breakdownMetricsEnabled = true 269 } 270 271 propagateLegacyHeader, err := initialUseElasticTraceparentHeader() 272 if failed(err) { 273 propagateLegacyHeader = true 274 } 275 276 cpuProfileInterval, cpuProfileDuration, err := initialCPUProfileIntervalDuration() 277 if failed(err) { 278 cpuProfileInterval = 0 279 cpuProfileDuration = 0 280 } 281 heapProfileInterval, err := initialHeapProfileInterval() 282 if failed(err) { 283 heapProfileInterval = 0 284 } 285 286 exitSpanMinDuration, err := initialExitSpanMinDuration() 287 if failed(err) { 288 exitSpanMinDuration = defaultExitSpanMinDuration 289 } 290 291 continuationStrategy, err := initContinuationStrategy() 292 if failed(err) { 293 continuationStrategy = defaultContinuationStrategy 294 } 295 296 if opts.ServiceName != "" { 297 err := validateServiceName(opts.ServiceName) 298 if failed(err) { 299 opts.ServiceName = "" 300 } 301 } 302 303 serviceName, serviceVersion, serviceEnvironment := initialService() 304 if opts.ServiceName == "" { 305 opts.ServiceName = serviceName 306 } 307 if opts.ServiceVersion == "" { 308 opts.ServiceVersion = serviceVersion 309 } 310 if opts.ServiceEnvironment == "" { 311 opts.ServiceEnvironment = serviceEnvironment 312 } 313 314 if opts.Transport == nil { 315 initialTransport, err := initialTransport(opts.ServiceName, opts.ServiceVersion) 316 if failed(err) { 317 opts.Transport = transport.NewDiscardTransport(err) 318 } else { 319 opts.Transport = initialTransport 320 } 321 } 322 323 if len(errs) != 0 && !continueOnError { 324 return errs[0] 325 } 326 for _, err := range errs { 327 log.Printf("[apm]: %s", err) 328 } 329 330 opts.globalLabels = parseGlobalLabels() 331 opts.requestDuration = requestDuration 332 opts.metricsInterval = metricsInterval 333 opts.requestSize = requestSize 334 opts.bufferSize = bufferSize 335 opts.metricsBufferSize = metricsBufferSize 336 opts.maxSpans = maxSpans 337 opts.compressionOptions = compressionOptions{ 338 enabled: spanCompressionEnabled, 339 exactMatchMaxDuration: spanCompressionExactMatchMaxDuration, 340 sameKindMaxDuration: spanCompressionSameKindMaxDuration, 341 } 342 opts.sampler = sampler 343 opts.sanitizedFieldNames = initialSanitizedFieldNames() 344 opts.disabledMetrics = initialDisabledMetrics() 345 opts.ignoreTransactionURLs = initialIgnoreTransactionURLs() 346 opts.breakdownMetrics = breakdownMetricsEnabled 347 opts.captureHeaders = captureHeaders 348 opts.captureBody = captureBody 349 opts.spanStackTraceMinDuration = spanStackTraceMinDuration 350 opts.stackTraceLimit = stackTraceLimit 351 opts.active = active 352 opts.recording = recording 353 opts.propagateLegacyHeader = propagateLegacyHeader 354 opts.exitSpanMinDuration = exitSpanMinDuration 355 opts.continuationStrategy = continuationStrategy 356 if centralConfigEnabled { 357 if cw, ok := opts.Transport.(apmconfig.Watcher); ok { 358 opts.configWatcher = cw 359 } 360 } 361 if ps, ok := opts.Transport.(profileSender); ok { 362 opts.profileSender = ps 363 opts.cpuProfileInterval = cpuProfileInterval 364 opts.cpuProfileDuration = cpuProfileDuration 365 opts.heapProfileInterval = heapProfileInterval 366 } 367 if vg, ok := opts.Transport.(majorVersionGetter); ok { 368 opts.versionGetter = vg 369 } 370 return nil 371 } 372 373 type compressionOptions struct { 374 enabled bool 375 exactMatchMaxDuration time.Duration 376 sameKindMaxDuration time.Duration 377 } 378 379 // Tracer manages the sampling and sending of transactions to 380 // Elastic APM. 381 // 382 // Transactions are buffered until they are flushed (forcibly 383 // with a Flush call, or when the flush timer expires), or when 384 // the maximum transaction queue size is reached. Failure to 385 // send will be periodically retried. Once the queue limit has 386 // been reached, new transactions will replace older ones in 387 // the queue. 388 // 389 // Errors are sent as soon as possible, but will buffered and 390 // later sent in bulk if the tracer is busy, or otherwise cannot 391 // send to the server, e.g. due to network failure. There is 392 // a limit to the number of errors that will be buffered, and 393 // once that limit has been reached, new errors will be dropped 394 // until the queue is drained. 395 type Tracer struct { 396 transport transport.Transport 397 service model.Service 398 process *model.Process 399 system *model.System 400 active int32 401 bufferSize int 402 metricsBufferSize int 403 closing chan struct{} 404 closed chan struct{} 405 forceFlush chan chan<- struct{} 406 forceSendMetrics chan chan<- struct{} 407 configCommands chan tracerConfigCommand 408 configWatcher chan apmconfig.Watcher 409 events chan tracerEvent 410 breakdownMetrics *breakdownMetrics 411 profileSender profileSender 412 versionGetter majorVersionGetter 413 globalLabels model.StringMap 414 415 // stats is heap-allocated to ensure correct alignment for atomic access. 416 stats *TracerStats 417 418 // instrumentationConfig_ must only be accessed and mutated 419 // using Tracer.instrumentationConfig() and Tracer.setInstrumentationConfig(). 420 instrumentationConfigInternal *instrumentationConfig 421 422 errorDataPool sync.Pool 423 spanDataPool sync.Pool 424 transactionDataPool sync.Pool 425 } 426 427 // NewTracer returns a new Tracer, using the default transport, 428 // and with the specified service name and version if specified. 429 // This is equivalent to calling NewTracerOptions with a 430 // TracerOptions having ServiceName and ServiceVersion set to 431 // the provided arguments. 432 func NewTracer(serviceName, serviceVersion string) (*Tracer, error) { 433 return NewTracerOptions(TracerOptions{ 434 ServiceName: serviceName, 435 ServiceVersion: serviceVersion, 436 }) 437 } 438 439 // NewTracerOptions returns a new Tracer using the provided options. 440 // See TracerOptions for details on the options, and their default 441 // values. 442 func NewTracerOptions(opts TracerOptions) (*Tracer, error) { 443 if err := opts.initDefaults(false); err != nil { 444 return nil, err 445 } 446 return newTracer(opts), nil 447 } 448 449 func newTracer(opts TracerOptions) *Tracer { 450 t := &Tracer{ 451 transport: opts.Transport, 452 service: makeService( 453 opts.ServiceName, 454 opts.ServiceVersion, 455 opts.ServiceEnvironment, 456 ), 457 process: ¤tProcess, 458 system: &localSystem, 459 closing: make(chan struct{}), 460 closed: make(chan struct{}), 461 forceFlush: make(chan chan<- struct{}), 462 forceSendMetrics: make(chan chan<- struct{}), 463 configCommands: make(chan tracerConfigCommand), 464 configWatcher: make(chan apmconfig.Watcher), 465 events: make(chan tracerEvent, tracerEventChannelCap), 466 active: 1, 467 breakdownMetrics: newBreakdownMetrics(), 468 stats: &TracerStats{}, 469 bufferSize: opts.bufferSize, 470 metricsBufferSize: opts.metricsBufferSize, 471 profileSender: opts.profileSender, 472 versionGetter: opts.versionGetter, 473 instrumentationConfigInternal: &instrumentationConfig{ 474 local: make(map[string]func(*instrumentationConfigValues)), 475 }, 476 globalLabels: opts.globalLabels, 477 } 478 t.breakdownMetrics.enabled = opts.breakdownMetrics 479 // Initialise local transaction config. 480 t.setLocalInstrumentationConfig(envRecording, func(cfg *instrumentationConfigValues) { 481 cfg.recording = opts.recording 482 }) 483 t.setLocalInstrumentationConfig(envCaptureBody, func(cfg *instrumentationConfigValues) { 484 cfg.captureBody = opts.captureBody 485 }) 486 t.setLocalInstrumentationConfig(envCaptureHeaders, func(cfg *instrumentationConfigValues) { 487 cfg.captureHeaders = opts.captureHeaders 488 }) 489 t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) { 490 cfg.maxSpans = opts.maxSpans 491 }) 492 t.setLocalInstrumentationConfig(envSpanCompressionEnabled, func(cfg *instrumentationConfigValues) { 493 cfg.compressionOptions.enabled = opts.compressionOptions.enabled 494 }) 495 t.setLocalInstrumentationConfig(envSpanCompressionExactMatchMaxDuration, func(cfg *instrumentationConfigValues) { 496 cfg.compressionOptions.exactMatchMaxDuration = opts.compressionOptions.exactMatchMaxDuration 497 }) 498 t.setLocalInstrumentationConfig(envSpanCompressionSameKindMaxDuration, func(cfg *instrumentationConfigValues) { 499 cfg.compressionOptions.sameKindMaxDuration = opts.compressionOptions.sameKindMaxDuration 500 }) 501 t.setLocalInstrumentationConfig(envTransactionSampleRate, func(cfg *instrumentationConfigValues) { 502 cfg.sampler = opts.sampler 503 }) 504 t.setLocalInstrumentationConfig(envSpanStackTraceMinDuration, func(cfg *instrumentationConfigValues) { 505 cfg.spanStackTraceMinDuration = opts.spanStackTraceMinDuration 506 }) 507 t.setLocalInstrumentationConfig(envStackTraceLimit, func(cfg *instrumentationConfigValues) { 508 cfg.stackTraceLimit = opts.stackTraceLimit 509 }) 510 t.setLocalInstrumentationConfig(envUseElasticTraceparentHeader, func(cfg *instrumentationConfigValues) { 511 cfg.propagateLegacyHeader = opts.propagateLegacyHeader 512 }) 513 t.setLocalInstrumentationConfig(envSanitizeFieldNames, func(cfg *instrumentationConfigValues) { 514 cfg.sanitizedFieldNames = opts.sanitizedFieldNames 515 }) 516 t.setLocalInstrumentationConfig(envIgnoreURLs, func(cfg *instrumentationConfigValues) { 517 cfg.ignoreTransactionURLs = opts.ignoreTransactionURLs 518 }) 519 t.setLocalInstrumentationConfig(envExitSpanMinDuration, func(cfg *instrumentationConfigValues) { 520 cfg.exitSpanMinDuration = opts.exitSpanMinDuration 521 }) 522 t.setLocalInstrumentationConfig(envContinuationStrategy, func(cfg *instrumentationConfigValues) { 523 cfg.continuationStrategy = opts.continuationStrategy 524 }) 525 if logger := apmlog.DefaultLogger(); logger != nil { 526 defaultLogLevel := logger.Level() 527 t.setLocalInstrumentationConfig(apmlog.EnvLogLevel, func(cfg *instrumentationConfigValues) { 528 // Revert to the original, local, log level when 529 // the centrally defined log level is removed. 530 logger.SetLevel(defaultLogLevel) 531 }) 532 } 533 534 if !opts.active { 535 t.active = 0 536 close(t.closed) 537 return t 538 } 539 540 go t.loop() 541 t.configCommands <- func(cfg *tracerConfig) { 542 cfg.recording = opts.recording 543 cfg.cpuProfileInterval = opts.cpuProfileInterval 544 cfg.cpuProfileDuration = opts.cpuProfileDuration 545 cfg.heapProfileInterval = opts.heapProfileInterval 546 cfg.metricsInterval = opts.metricsInterval 547 cfg.requestDuration = opts.requestDuration 548 cfg.requestSize = opts.requestSize 549 cfg.disabledMetrics = opts.disabledMetrics 550 cfg.metricsGatherers = []MetricsGatherer{newBuiltinMetricsGatherer(t)} 551 if logger := apmlog.DefaultLogger(); logger != nil { 552 cfg.logger = logger 553 } 554 } 555 if opts.configWatcher != nil { 556 t.configWatcher <- opts.configWatcher 557 } 558 return t 559 } 560 561 // tracerConfig holds the tracer's runtime configuration, which may be modified 562 // by sending a tracerConfigCommand to the tracer's configCommands channel. 563 type tracerConfig struct { 564 recording bool 565 requestSize int 566 requestDuration time.Duration 567 metricsInterval time.Duration 568 logger Logger 569 metricsGatherers []MetricsGatherer 570 disabledMetrics wildcard.Matchers 571 cpuProfileDuration time.Duration 572 cpuProfileInterval time.Duration 573 heapProfileInterval time.Duration 574 } 575 576 type tracerConfigCommand func(*tracerConfig) 577 578 // Close closes the Tracer, preventing transactions from being 579 // sent to the APM server. 580 func (t *Tracer) Close() { 581 select { 582 case <-t.closing: 583 default: 584 close(t.closing) 585 } 586 <-t.closed 587 } 588 589 // Flush waits for the Tracer to flush any transactions and errors it currently 590 // has queued to the APM server, the tracer is stopped, or the abort channel 591 // is signaled. 592 func (t *Tracer) Flush(abort <-chan struct{}) { 593 flushed := make(chan struct{}, 1) 594 select { 595 case t.forceFlush <- flushed: 596 select { 597 case <-abort: 598 case <-flushed: 599 case <-t.closed: 600 } 601 case <-t.closed: 602 } 603 } 604 605 // Recording reports whether the tracer is recording events. Instrumentation 606 // may use this to avoid creating transactions, spans, and metrics when the 607 // tracer is configured to not record. 608 // 609 // Recording will also return false if the tracer is inactive. 610 func (t *Tracer) Recording() bool { 611 return t.instrumentationConfig().recording && t.Active() 612 } 613 614 // Active reports whether the tracer is active. If the tracer is inactive, 615 // no transactions or errors will be sent to the Elastic APM server. 616 func (t *Tracer) Active() bool { 617 return atomic.LoadInt32(&t.active) == 1 618 } 619 620 // ShouldPropagateLegacyHeader reports whether instrumentation should 621 // propagate the legacy "Elastic-Apm-Traceparent" header in addition to 622 // the standard W3C "traceparent" header. 623 // 624 // This method will be removed in a future major version when we remove 625 // support for propagating the legacy header. 626 func (t *Tracer) ShouldPropagateLegacyHeader() bool { 627 return t.instrumentationConfig().propagateLegacyHeader 628 } 629 630 // SetRequestDuration sets the maximum amount of time to keep a request open 631 // to the APM server for streaming data before closing the stream and starting 632 // a new request. 633 func (t *Tracer) SetRequestDuration(d time.Duration) { 634 t.sendConfigCommand(func(cfg *tracerConfig) { 635 cfg.requestDuration = d 636 }) 637 } 638 639 // SetMetricsInterval sets the metrics interval -- the amount of time in 640 // between metrics samples being gathered. 641 func (t *Tracer) SetMetricsInterval(d time.Duration) { 642 t.sendConfigCommand(func(cfg *tracerConfig) { 643 cfg.metricsInterval = d 644 }) 645 } 646 647 // SetLogger sets the Logger to be used for logging the operation of 648 // the tracer. 649 // 650 // The tracer is initialized with a default logger configured with the 651 // environment variables ELASTIC_APM_LOG_FILE and ELASTIC_APM_LOG_LEVEL. 652 // Calling SetLogger will replace the default logger. 653 func (t *Tracer) SetLogger(logger Logger) { 654 t.sendConfigCommand(func(cfg *tracerConfig) { 655 cfg.logger = logger 656 }) 657 } 658 659 // SetSanitizedFieldNames sets the wildcard patterns that will be used to 660 // match cookie and form field names for sanitization. Fields matching any 661 // of the the supplied patterns will have their values redacted. If 662 // SetSanitizedFieldNames is called with no arguments, then no fields 663 // will be redacted. 664 // 665 // Configuration via Kibana takes precedence over local configuration, so 666 // if sanitized_field_names has been configured via Kibana, this call will 667 // not have any effect until/unless that configuration has been removed. 668 func (t *Tracer) SetSanitizedFieldNames(patterns ...string) error { 669 var matchers wildcard.Matchers 670 if len(patterns) != 0 { 671 matchers = make(wildcard.Matchers, len(patterns)) 672 for i, p := range patterns { 673 matchers[i] = configutil.ParseWildcardPattern(p) 674 } 675 } 676 t.setLocalInstrumentationConfig(envSanitizeFieldNames, func(cfg *instrumentationConfigValues) { 677 cfg.sanitizedFieldNames = matchers 678 }) 679 return nil 680 } 681 682 // SetIgnoreTransactionURLs sets the wildcard patterns that will be used to 683 // ignore transactions with matching URLs. 684 func (t *Tracer) SetIgnoreTransactionURLs(pattern string) error { 685 t.setLocalInstrumentationConfig(envIgnoreURLs, func(cfg *instrumentationConfigValues) { 686 cfg.ignoreTransactionURLs = configutil.ParseWildcardPatterns(pattern) 687 }) 688 return nil 689 } 690 691 // RegisterMetricsGatherer registers g for periodic (or forced) metrics 692 // gathering by t. 693 // 694 // RegisterMetricsGatherer returns a function which will deregister g. 695 // It may safely be called multiple times. 696 func (t *Tracer) RegisterMetricsGatherer(g MetricsGatherer) func() { 697 // Wrap g in a pointer-to-struct, so we can safely compare. 698 wrapped := &struct{ MetricsGatherer }{MetricsGatherer: g} 699 t.sendConfigCommand(func(cfg *tracerConfig) { 700 cfg.metricsGatherers = append(cfg.metricsGatherers, wrapped) 701 }) 702 deregister := func(cfg *tracerConfig) { 703 for i, g := range cfg.metricsGatherers { 704 if g != wrapped { 705 continue 706 } 707 cfg.metricsGatherers = append(cfg.metricsGatherers[:i], cfg.metricsGatherers[i+1:]...) 708 } 709 } 710 var once sync.Once 711 return func() { 712 once.Do(func() { 713 t.sendConfigCommand(deregister) 714 }) 715 } 716 } 717 718 // SetConfigWatcher sets w as the config watcher. 719 // 720 // By default, the tracer will be configured to use the transport for 721 // watching config, if the transport implements apmconfig.Watcher. This 722 // can be overridden by calling SetConfigWatcher. 723 // 724 // If w is nil, config watching will be stopped. 725 // 726 // Calling SetConfigWatcher will discard any previously observed remote 727 // config, reverting to local config until a config change from w is 728 // observed. 729 func (t *Tracer) SetConfigWatcher(w apmconfig.Watcher) { 730 select { 731 case t.configWatcher <- w: 732 case <-t.closing: 733 case <-t.closed: 734 } 735 } 736 737 func (t *Tracer) sendConfigCommand(cmd tracerConfigCommand) { 738 select { 739 case t.configCommands <- cmd: 740 case <-t.closing: 741 case <-t.closed: 742 } 743 } 744 745 // SetRecording enables or disables recording of future events. 746 // 747 // SetRecording does not affect in-flight events. 748 func (t *Tracer) SetRecording(r bool) { 749 t.setLocalInstrumentationConfig(envRecording, func(cfg *instrumentationConfigValues) { 750 // Update instrumentation config to disable transactions and errors. 751 cfg.recording = r 752 }) 753 t.sendConfigCommand(func(cfg *tracerConfig) { 754 // Consult t.instrumentationConfig() as local config may not be in effect, 755 // or there may have been a concurrent change to instrumentation config. 756 cfg.recording = t.instrumentationConfig().recording 757 }) 758 } 759 760 // SetSampler sets the sampler the tracer. 761 // 762 // It is valid to pass nil, in which case all transactions will be sampled. 763 // 764 // Configuration via Kibana takes precedence over local configuration, so 765 // if sampling has been configured via Kibana, this call will not have any 766 // effect until/unless that configuration has been removed. 767 func (t *Tracer) SetSampler(s Sampler) { 768 t.setLocalInstrumentationConfig(envTransactionSampleRate, func(cfg *instrumentationConfigValues) { 769 cfg.sampler = s 770 }) 771 } 772 773 // SetMaxSpans sets the maximum number of spans that will be added 774 // to a transaction before dropping spans. 775 // 776 // Passing in zero will disable all spans, while negative values will 777 // permit an unlimited number of spans. 778 func (t *Tracer) SetMaxSpans(n int) { 779 t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) { 780 cfg.maxSpans = n 781 }) 782 } 783 784 // SetSpanCompressionEnabled enables/disables the span compression feature. 785 func (t *Tracer) SetSpanCompressionEnabled(v bool) { 786 t.setLocalInstrumentationConfig(envSpanCompressionEnabled, func(cfg *instrumentationConfigValues) { 787 cfg.compressionOptions.enabled = v 788 }) 789 } 790 791 // SetSpanCompressionExactMatchMaxDuration sets the maximum duration for a span 792 // to be compressed with `compression_strategy` == `exact_match`. 793 func (t *Tracer) SetSpanCompressionExactMatchMaxDuration(v time.Duration) { 794 t.setLocalInstrumentationConfig(envSpanCompressionExactMatchMaxDuration, func(cfg *instrumentationConfigValues) { 795 cfg.compressionOptions.exactMatchMaxDuration = v 796 }) 797 } 798 799 // SetSpanCompressionSameKindMaxDuration sets the maximum duration for a span 800 // to be compressed with `compression_strategy` == `same_kind`. 801 func (t *Tracer) SetSpanCompressionSameKindMaxDuration(v time.Duration) { 802 t.setLocalInstrumentationConfig(envSpanCompressionSameKindMaxDuration, func(cfg *instrumentationConfigValues) { 803 cfg.compressionOptions.sameKindMaxDuration = v 804 }) 805 } 806 807 // SetSpanStackTraceMinDuration sets the minimum duration for a span after which 808 // we will capture its stack frames. 809 func (t *Tracer) SetSpanStackTraceMinDuration(d time.Duration) { 810 t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) { 811 cfg.spanStackTraceMinDuration = d 812 }) 813 } 814 815 // SetStackTraceLimit sets the the maximum number of stack frames to collect 816 // for each stack trace. If limit is negative, then all frames will be collected. 817 func (t *Tracer) SetStackTraceLimit(limit int) { 818 t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) { 819 cfg.stackTraceLimit = limit 820 }) 821 } 822 823 // SetCaptureHeaders enables or disables capturing of HTTP headers. 824 func (t *Tracer) SetCaptureHeaders(capture bool) { 825 t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) { 826 cfg.captureHeaders = capture 827 }) 828 } 829 830 // SetCaptureBody sets the HTTP request body capture mode. 831 func (t *Tracer) SetCaptureBody(mode CaptureBodyMode) { 832 t.setLocalInstrumentationConfig(envMaxSpans, func(cfg *instrumentationConfigValues) { 833 cfg.captureBody = mode 834 }) 835 } 836 837 // SetExitSpanMinDuration sets the minimum duration for an exit span to not be 838 // dropped. 839 func (t *Tracer) SetExitSpanMinDuration(v time.Duration) { 840 t.setLocalInstrumentationConfig(envExitSpanMinDuration, func(cfg *instrumentationConfigValues) { 841 cfg.exitSpanMinDuration = v 842 }) 843 } 844 845 // SetContinuationStrategy sets the continuation strategy. 846 func (t *Tracer) SetContinuationStrategy(v string) { 847 t.setLocalInstrumentationConfig(envContinuationStrategy, func(cfg *instrumentationConfigValues) { 848 cfg.continuationStrategy = v 849 }) 850 } 851 852 // SendMetrics forces the tracer to gather and send metrics immediately, 853 // blocking until the metrics have been sent or the abort channel is 854 // signalled. 855 func (t *Tracer) SendMetrics(abort <-chan struct{}) { 856 sent := make(chan struct{}, 1) 857 select { 858 case t.forceSendMetrics <- sent: 859 select { 860 case <-abort: 861 case <-sent: 862 case <-t.closed: 863 } 864 case <-t.closed: 865 } 866 } 867 868 // Stats returns the current TracerStats. This will return the most 869 // recent values even after the tracer has been closed. 870 func (t *Tracer) Stats() TracerStats { 871 return t.stats.copy() 872 } 873 874 func (t *Tracer) loop() { 875 ctx, cancelContext := context.WithCancel(context.Background()) 876 defer cancelContext() 877 defer close(t.closed) 878 defer atomic.StoreInt32(&t.active, 0) 879 880 var req iochan.ReadRequest 881 var requestBuf bytes.Buffer 882 var metadata []byte 883 var gracePeriod time.Duration = -1 884 var flushed chan<- struct{} 885 var requestBufTransactions, requestBufSpans, requestBufErrors, requestBufMetricsets uint64 886 zlibWriter, _ := zlib.NewWriterLevel(&requestBuf, zlib.BestSpeed) 887 zlibFlushed := true 888 zlibClosed := false 889 iochanReader := iochan.NewReader() 890 requestBytesRead := 0 891 requestActive := false 892 closeRequest := false 893 flushRequest := false 894 requestResult := make(chan error, 1) 895 requestTimer := time.NewTimer(0) 896 requestTimerActive := false 897 if !requestTimer.Stop() { 898 <-requestTimer.C 899 } 900 901 // Run another goroutine to perform the blocking requests, 902 // communicating with the tracer loop to obtain stream data. 903 sendStreamRequest := make(chan time.Duration) 904 done := make(chan struct{}) 905 defer func() { 906 close(sendStreamRequest) 907 <-done 908 }() 909 go func() { 910 defer close(done) 911 jitterRand := rand.New(rand.NewSource(time.Now().UnixNano())) 912 for gracePeriod := range sendStreamRequest { 913 if gracePeriod > 0 { 914 select { 915 case <-time.After(jitterDuration(gracePeriod, jitterRand, gracePeriodJitter)): 916 case <-ctx.Done(): 917 } 918 } 919 requestResult <- t.transport.SendStream(ctx, iochanReader) 920 } 921 }() 922 923 refreshServerVersionDeadline := 10 * time.Second 924 refreshVersionTicker := time.NewTicker(refreshServerVersionDeadline) 925 defer refreshVersionTicker.Stop() 926 if t.versionGetter != nil { 927 go t.maybeRefreshServerVersion(ctx, refreshServerVersionDeadline) 928 } else { 929 // If versionGetter is nil, stop the timer. 930 refreshVersionTicker.Stop() 931 } 932 933 var breakdownMetricsLimitWarningLogged bool 934 var stats TracerStats 935 var metrics Metrics 936 var sentMetrics chan<- struct{} 937 var gatheringMetrics bool 938 var metricsTimerStart time.Time 939 metricsBuffer := ringbuffer.New(t.metricsBufferSize) 940 gatheredMetrics := make(chan struct{}, 1) 941 metricsTimer := time.NewTimer(0) 942 if !metricsTimer.Stop() { 943 <-metricsTimer.C 944 } 945 946 var lastConfigChange map[string]string 947 var configChanges <-chan apmconfig.Change 948 var stopConfigWatcher func() 949 defer func() { 950 if stopConfigWatcher != nil { 951 stopConfigWatcher() 952 } 953 }() 954 955 cpuProfilingState := newCPUProfilingState(t.profileSender) 956 heapProfilingState := newHeapProfilingState(t.profileSender) 957 958 var cfg tracerConfig 959 buffer := ringbuffer.New(t.bufferSize) 960 buffer.Evicted = func(h ringbuffer.BlockHeader) { 961 switch h.Tag { 962 case errorBlockTag: 963 stats.ErrorsDropped++ 964 case spanBlockTag: 965 stats.SpansDropped++ 966 case transactionBlockTag: 967 stats.TransactionsDropped++ 968 } 969 } 970 modelWriter := modelWriter{ 971 buffer: buffer, 972 metricsBuffer: metricsBuffer, 973 cfg: &cfg, 974 stats: &stats, 975 } 976 977 handleTracerConfigCommand := func(cmd tracerConfigCommand) { 978 var oldMetricsInterval time.Duration 979 if cfg.recording { 980 oldMetricsInterval = cfg.metricsInterval 981 } 982 cmd(&cfg) 983 var metricsInterval, cpuProfileInterval, cpuProfileDuration, heapProfileInterval time.Duration 984 if cfg.recording { 985 metricsInterval = cfg.metricsInterval 986 cpuProfileInterval = cfg.cpuProfileInterval 987 cpuProfileDuration = cfg.cpuProfileDuration 988 heapProfileInterval = cfg.heapProfileInterval 989 } 990 991 cpuProfilingState.updateConfig(cpuProfileInterval, cpuProfileDuration) 992 heapProfilingState.updateConfig(heapProfileInterval, 0) 993 if !gatheringMetrics && metricsInterval != oldMetricsInterval { 994 if metricsTimerStart.IsZero() { 995 if metricsInterval > 0 { 996 metricsTimer.Reset(metricsInterval) 997 metricsTimerStart = time.Now() 998 } 999 } else { 1000 if metricsInterval <= 0 { 1001 metricsTimerStart = time.Time{} 1002 if !metricsTimer.Stop() { 1003 <-metricsTimer.C 1004 } 1005 } else { 1006 alreadyPassed := time.Since(metricsTimerStart) 1007 if alreadyPassed >= metricsInterval { 1008 metricsTimer.Reset(0) 1009 } else { 1010 metricsTimer.Reset(metricsInterval - alreadyPassed) 1011 } 1012 } 1013 } 1014 } 1015 } 1016 1017 for { 1018 var gatherMetrics bool 1019 select { 1020 case <-t.closing: 1021 cancelContext() // informs transport that EOF is expected 1022 iochanReader.CloseRead(io.EOF) 1023 return 1024 case cmd := <-t.configCommands: 1025 handleTracerConfigCommand(cmd) 1026 continue 1027 case cw := <-t.configWatcher: 1028 if configChanges != nil { 1029 stopConfigWatcher() 1030 t.updateRemoteConfig(cfg.logger, lastConfigChange, nil) 1031 lastConfigChange = nil 1032 configChanges = nil 1033 } 1034 if cw == nil { 1035 continue 1036 } 1037 var configWatcherContext context.Context 1038 var watchParams apmconfig.WatchParams 1039 watchParams.Service.Name = t.service.Name 1040 watchParams.Service.Environment = t.service.Environment 1041 configWatcherContext, stopConfigWatcher = context.WithCancel(ctx) 1042 configChanges = cw.WatchConfig(configWatcherContext, watchParams) 1043 // Silence go vet's "possible context leak" false positive. 1044 // We call a previous stopConfigWatcher before reassigning 1045 // the variable, and we have a defer at the top level of the 1046 // loop method that will call the final stopConfigWatcher 1047 // value on method exit. 1048 _ = stopConfigWatcher 1049 continue 1050 case change, ok := <-configChanges: 1051 if !ok { 1052 configChanges = nil 1053 continue 1054 } 1055 if change.Err != nil { 1056 if cfg.logger != nil { 1057 cfg.logger.Errorf("config request failed: %s", change.Err) 1058 } 1059 } else { 1060 t.updateRemoteConfig(cfg.logger, lastConfigChange, change.Attrs) 1061 lastConfigChange = change.Attrs 1062 handleTracerConfigCommand(func(cfg *tracerConfig) { 1063 cfg.recording = t.instrumentationConfig().recording 1064 }) 1065 } 1066 continue 1067 case <-refreshVersionTicker.C: 1068 go t.maybeRefreshServerVersion(ctx, refreshServerVersionDeadline) 1069 case event := <-t.events: 1070 switch event.eventType { 1071 case transactionEvent: 1072 if !t.breakdownMetrics.recordTransaction(event.tx.TransactionData) { 1073 if !breakdownMetricsLimitWarningLogged && cfg.logger != nil { 1074 cfg.logger.Warningf("%s", breakdownMetricsLimitWarning) 1075 breakdownMetricsLimitWarningLogged = true 1076 } 1077 } 1078 // Drop unsampled transactions when the APM Server is >= 8.0 1079 drop := t.maybeDropTransaction( 1080 ctx, event.tx.TransactionData, event.tx.Sampled(), 1081 ) 1082 if !drop { 1083 modelWriter.writeTransaction(event.tx.Transaction, event.tx.TransactionData) 1084 } 1085 case spanEvent: 1086 modelWriter.writeSpan(event.span.Span, event.span.SpanData) 1087 case errorEvent: 1088 modelWriter.writeError(event.err) 1089 // Flush the buffer to transmit the error immediately. 1090 flushRequest = true 1091 } 1092 case <-requestTimer.C: 1093 requestTimerActive = false 1094 closeRequest = true 1095 case <-metricsTimer.C: 1096 metricsTimerStart = time.Time{} 1097 gatherMetrics = !gatheringMetrics 1098 case sentMetrics = <-t.forceSendMetrics: 1099 if cfg.recording { 1100 if !metricsTimerStart.IsZero() { 1101 if !metricsTimer.Stop() { 1102 <-metricsTimer.C 1103 } 1104 metricsTimerStart = time.Time{} 1105 } 1106 gatherMetrics = !gatheringMetrics 1107 } 1108 case <-gatheredMetrics: 1109 modelWriter.writeMetrics(&metrics) 1110 gatheringMetrics = false 1111 flushRequest = true 1112 if cfg.recording && cfg.metricsInterval > 0 { 1113 metricsTimerStart = time.Now() 1114 metricsTimer.Reset(cfg.metricsInterval) 1115 } 1116 case <-cpuProfilingState.timer.C: 1117 cpuProfilingState.start(ctx, cfg.logger, t.metadataReader()) 1118 case <-cpuProfilingState.finished: 1119 cpuProfilingState.resetTimer() 1120 case <-heapProfilingState.timer.C: 1121 heapProfilingState.start(ctx, cfg.logger, t.metadataReader()) 1122 case <-heapProfilingState.finished: 1123 heapProfilingState.resetTimer() 1124 case flushed = <-t.forceFlush: 1125 // Drain any objects buffered in the channels. 1126 for n := len(t.events); n > 0; n-- { 1127 event := <-t.events 1128 switch event.eventType { 1129 case transactionEvent: 1130 if !t.breakdownMetrics.recordTransaction(event.tx.TransactionData) { 1131 if !breakdownMetricsLimitWarningLogged && cfg.logger != nil { 1132 cfg.logger.Warningf("%s", breakdownMetricsLimitWarning) 1133 breakdownMetricsLimitWarningLogged = true 1134 } 1135 } 1136 // Drop unsampled transactions when the APM Server is >= 8.0 1137 drop := t.maybeDropTransaction( 1138 ctx, event.tx.TransactionData, event.tx.Sampled(), 1139 ) 1140 if !drop { 1141 modelWriter.writeTransaction(event.tx.Transaction, event.tx.TransactionData) 1142 } 1143 case spanEvent: 1144 modelWriter.writeSpan(event.span.Span, event.span.SpanData) 1145 case errorEvent: 1146 modelWriter.writeError(event.err) 1147 } 1148 } 1149 if !requestActive && buffer.Len() == 0 && metricsBuffer.Len() == 0 { 1150 flushed <- struct{}{} 1151 continue 1152 } 1153 closeRequest = true 1154 case req = <-iochanReader.C: 1155 case err := <-requestResult: 1156 if err != nil { 1157 stats.Errors.SendStream++ 1158 gracePeriod = nextGracePeriod(gracePeriod) 1159 if cfg.logger != nil { 1160 logf := cfg.logger.Debugf 1161 if err, ok := err.(*transport.HTTPError); ok && err.Response.StatusCode == 404 { 1162 // 404 typically means the server is too old, meaning 1163 // the error is due to a misconfigured environment. 1164 logf = cfg.logger.Errorf 1165 } 1166 logf("request failed: %s (next request in ~%s)", err, gracePeriod) 1167 } 1168 } else { 1169 gracePeriod = -1 // Reset grace period after success. 1170 stats.TransactionsSent += requestBufTransactions 1171 stats.SpansSent += requestBufSpans 1172 stats.ErrorsSent += requestBufErrors 1173 if cfg.logger != nil { 1174 s := func(n uint64) string { 1175 if n != 1 { 1176 return "s" 1177 } 1178 return "" 1179 } 1180 cfg.logger.Debugf( 1181 "sent request with %d transaction%s, %d span%s, %d error%s, %d metricset%s", 1182 requestBufTransactions, s(requestBufTransactions), 1183 requestBufSpans, s(requestBufSpans), 1184 requestBufErrors, s(requestBufErrors), 1185 requestBufMetricsets, s(requestBufMetricsets), 1186 ) 1187 } 1188 } 1189 if !stats.isZero() { 1190 t.stats.accumulate(stats) 1191 stats = TracerStats{} 1192 } 1193 if sentMetrics != nil && requestBufMetricsets > 0 { 1194 sentMetrics <- struct{}{} 1195 sentMetrics = nil 1196 } 1197 if flushed != nil { 1198 flushed <- struct{}{} 1199 flushed = nil 1200 } 1201 if req.Buf != nil { 1202 // req will be canceled by CloseRead below. 1203 req.Buf = nil 1204 } 1205 iochanReader.CloseRead(io.EOF) 1206 iochanReader = iochan.NewReader() 1207 flushRequest = false 1208 closeRequest = false 1209 requestActive = false 1210 requestBytesRead = 0 1211 requestBuf.Reset() 1212 requestBufTransactions = 0 1213 requestBufSpans = 0 1214 requestBufErrors = 0 1215 requestBufMetricsets = 0 1216 if requestTimerActive { 1217 if !requestTimer.Stop() { 1218 <-requestTimer.C 1219 } 1220 requestTimerActive = false 1221 } 1222 } 1223 1224 if !stats.isZero() { 1225 t.stats.accumulate(stats) 1226 stats = TracerStats{} 1227 } 1228 1229 if gatherMetrics { 1230 gatheringMetrics = true 1231 metrics.disabled = cfg.disabledMetrics 1232 t.gatherMetrics(ctx, cfg.metricsGatherers, &metrics, cfg.logger, gatheredMetrics) 1233 if cfg.logger != nil { 1234 cfg.logger.Debugf("gathering metrics") 1235 } 1236 } 1237 1238 if !requestActive { 1239 if buffer.Len() == 0 && metricsBuffer.Len() == 0 { 1240 continue 1241 } 1242 sendStreamRequest <- gracePeriod 1243 if metadata == nil { 1244 metadata = t.jsonRequestMetadata() 1245 } 1246 zlibWriter.Reset(&requestBuf) 1247 zlibWriter.Write(metadata) 1248 zlibFlushed = false 1249 zlibClosed = false 1250 requestActive = true 1251 requestTimer.Reset(cfg.requestDuration) 1252 requestTimerActive = true 1253 } 1254 1255 if !closeRequest || !zlibClosed { 1256 for requestBytesRead+requestBuf.Len() < cfg.requestSize { 1257 if metricsBuffer.Len() > 0 { 1258 if _, _, err := metricsBuffer.WriteBlockTo(zlibWriter); err == nil { 1259 requestBufMetricsets++ 1260 zlibWriter.Write([]byte("\n")) 1261 zlibFlushed = false 1262 if sentMetrics != nil { 1263 // SendMetrics was called: close the request 1264 // off so we can inform the user when the 1265 // metrics have been processed. 1266 closeRequest = true 1267 } 1268 } 1269 continue 1270 } 1271 if buffer.Len() == 0 { 1272 break 1273 } 1274 if h, _, err := buffer.WriteBlockTo(zlibWriter); err == nil { 1275 switch h.Tag { 1276 case transactionBlockTag: 1277 requestBufTransactions++ 1278 case spanBlockTag: 1279 requestBufSpans++ 1280 case errorBlockTag: 1281 requestBufErrors++ 1282 } 1283 zlibWriter.Write([]byte("\n")) 1284 zlibFlushed = false 1285 } 1286 } 1287 if !closeRequest { 1288 closeRequest = requestBytesRead+requestBuf.Len() >= cfg.requestSize 1289 } 1290 } 1291 if closeRequest { 1292 if !zlibClosed { 1293 zlibWriter.Close() 1294 zlibClosed = true 1295 } 1296 } else if flushRequest && !zlibFlushed { 1297 zlibWriter.Flush() 1298 flushRequest = false 1299 zlibFlushed = true 1300 } 1301 1302 if req.Buf == nil || requestBuf.Len() == 0 { 1303 continue 1304 } 1305 const zlibHeaderLen = 2 1306 if requestBytesRead+requestBuf.Len() > zlibHeaderLen { 1307 n, err := requestBuf.Read(req.Buf) 1308 if closeRequest && err == nil && requestBuf.Len() == 0 { 1309 err = io.EOF 1310 } 1311 req.Respond(n, err) 1312 req.Buf = nil 1313 if n > 0 { 1314 requestBytesRead += n 1315 } 1316 } 1317 } 1318 } 1319 1320 // jsonRequestMetadata returns a JSON-encoded metadata object that features 1321 // at the head of every request body. This is called exactly once, when the 1322 // first request is made. 1323 func (t *Tracer) jsonRequestMetadata() []byte { 1324 var json fastjson.Writer 1325 json.RawString(`{"metadata":`) 1326 t.encodeRequestMetadata(&json) 1327 json.RawString("}\n") 1328 return json.Bytes() 1329 } 1330 1331 // metadataReader returns an io.Reader that holds the JSON-encoded metadata, 1332 // suitable for including in a profile request. 1333 func (t *Tracer) metadataReader() io.Reader { 1334 var metadata fastjson.Writer 1335 t.encodeRequestMetadata(&metadata) 1336 return bytes.NewReader(metadata.Bytes()) 1337 } 1338 1339 func (t *Tracer) encodeRequestMetadata(json *fastjson.Writer) { 1340 json.RawString(`{"system":`) 1341 t.system.MarshalFastJSON(json) 1342 json.RawString(`,"process":`) 1343 t.process.MarshalFastJSON(json) 1344 json.RawString(`,"service":`) 1345 t.service.MarshalFastJSON(json) 1346 if cloud := getCloudMetadata(); cloud != nil { 1347 json.RawString(`,"cloud":`) 1348 cloud.MarshalFastJSON(json) 1349 } 1350 if len(t.globalLabels) > 0 { 1351 json.RawString(`,"labels":`) 1352 t.globalLabels.MarshalFastJSON(json) 1353 } 1354 json.RawByte('}') 1355 } 1356 1357 // gatherMetrics gathers metrics from each of the registered 1358 // metrics gatherers. Once all gatherers have returned, a value 1359 // will be sent on the "gathered" channel. 1360 func (t *Tracer) gatherMetrics(ctx context.Context, gatherers []MetricsGatherer, m *Metrics, l Logger, gathered chan<- struct{}) { 1361 timestamp := model.Time(time.Now().UTC()) 1362 var group sync.WaitGroup 1363 for _, g := range gatherers { 1364 group.Add(1) 1365 go func(g MetricsGatherer) { 1366 defer group.Done() 1367 gatherMetrics(ctx, g, m, l) 1368 }(g) 1369 } 1370 go func() { 1371 group.Wait() 1372 for _, m := range m.transactionGroupMetrics { 1373 m.Timestamp = timestamp 1374 } 1375 for _, m := range m.metrics { 1376 m.Timestamp = timestamp 1377 } 1378 gathered <- struct{}{} 1379 }() 1380 } 1381 1382 // maybeDropTransaction may drop a transaction, for example when the transaction 1383 // is non-sampled and the target server version is 8.0 or greater. 1384 // maybeDropTransaction returns true if the transaction is dropped, false otherwise. 1385 func (t *Tracer) maybeDropTransaction(ctx context.Context, td *TransactionData, sampled bool) bool { 1386 if sampled || t.versionGetter == nil { 1387 return false 1388 } 1389 1390 v := t.versionGetter.MajorServerVersion(ctx, false) 1391 dropUnsampled := v >= 8 1392 if dropUnsampled { 1393 td.reset(t) 1394 } 1395 return dropUnsampled 1396 } 1397 1398 // maybeRefreshServerVersion refreshes the remote APM Server version if the version 1399 // has been marked as stale. 1400 func (t *Tracer) maybeRefreshServerVersion(ctx context.Context, deadline time.Duration) { 1401 if t.versionGetter == nil { 1402 return 1403 } 1404 1405 // Fast path, when the version has been cached, there's nothing to do. 1406 if v := t.versionGetter.MajorServerVersion(ctx, false); v > 0 { 1407 return 1408 } 1409 1410 // If there isn't a cached version, try to refresh the version. 1411 if deadline > 0 { 1412 var cancel context.CancelFunc 1413 ctx, cancel = context.WithTimeout(ctx, deadline) 1414 defer cancel() 1415 } 1416 _ = t.versionGetter.MajorServerVersion(ctx, true) 1417 return 1418 } 1419 1420 type tracerEventType int 1421 1422 const ( 1423 transactionEvent tracerEventType = iota 1424 spanEvent 1425 errorEvent 1426 ) 1427 1428 type tracerEvent struct { 1429 eventType tracerEventType 1430 1431 // err is set only if eventType == errorEvent. 1432 err *ErrorData 1433 1434 // tx is set only if eventType == transactionEvent. 1435 tx struct { 1436 *Transaction 1437 // Transaction.TransactionData is nil at the 1438 // point tracerEvent is created (to signify 1439 // that the transaction is ended), so we pass 1440 // it along side. 1441 *TransactionData 1442 } 1443 1444 // span is set only if eventType == spanEvent. 1445 span struct { 1446 *Span 1447 // Span.SpanData is nil at the point tracerEvent 1448 // is created (to signify that the span is ended), 1449 // so we pass it along side. 1450 *SpanData 1451 } 1452 } 1453 1454 type majorVersionGetter interface { 1455 // MajorServerVersion returns the APM Server's major version. When refreshStale 1456 // is true` it will request the remote APM Server's version from `/`, otherwise 1457 // it will return the cached version. If the returned first argument is 0, the 1458 // cache is stale. 1459 MajorServerVersion(ctx context.Context, refreshStale bool) uint32 1460 } 1461 1462 func parseGlobalLabels() model.StringMap { 1463 var labels model.StringMap 1464 for _, kv := range configutil.ParseListEnv(envGlobalLabels, ",", nil) { 1465 i := strings.IndexRune(kv, '=') 1466 if i > 0 { 1467 k, v := strings.TrimSpace(kv[:i]), strings.TrimSpace(kv[i+1:]) 1468 labels = append(labels, model.StringMapItem{ 1469 Key: cleanLabelKey(k), 1470 Value: truncateString(v), 1471 }) 1472 } 1473 } 1474 return labels 1475 }