github.com/epsagon/epsagon-go@v1.39.0/tracer/tracer.go (about) 1 package tracer 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "errors" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "log" 11 "net/http" 12 "os" 13 "runtime" 14 "runtime/debug" 15 "strconv" 16 "strings" 17 "sync" 18 "time" 19 20 "github.com/epsagon/epsagon-go/protocol" 21 "github.com/golang/protobuf/jsonpb" 22 ) 23 24 var ( 25 mutex sync.Mutex 26 // GlobalTracer A global Tracer for all internal uses 27 GlobalTracer Tracer 28 ) 29 30 // DefaultMaxTraceSizeKB is the default maximum trace size (in KB) 31 const DefaultMaxTraceSizeKB = 64 32 33 // DefaultMaxTraceSize is the default maximum trace size (in bytes) 34 const DefaultMaxTraceSize = DefaultMaxTraceSizeKB * 1024 35 36 // MaxTraceSizeKB is the maximum allowed trace size (in KB) 37 const MaxTraceSizeKB = 512 38 39 // MaxTraceSize is the maximum allowed trace size (in bytes) 40 const MaxTraceSize = MaxTraceSizeKB * 1024 41 42 // MaxLabelsSize is the maximum allowed total labels size (in bytes) 43 const MaxLabelsSize = 10 * 1024 44 45 // MaxTraceSizeEnvVar max trace size environment variable 46 const MaxTraceSizeEnvVar = "EPSAGON_MAX_TRACE_SIZE" 47 48 // LabelsKey is the key for labels in resource metadata 49 const LabelsKey = "labels" 50 51 const IsTrimmedKey = "is_trimmed" 52 53 const EpsagonHTTPTraceIDKey = "http_trace_id" 54 const EpsagonRequestTraceIDKey = "request_trace_id" 55 const AwsServiceKey = "aws.service" 56 57 var strongKeys = map[string]bool{ 58 EpsagonHTTPTraceIDKey: true, 59 EpsagonRequestTraceIDKey: true, 60 AwsServiceKey: true, 61 LabelsKey: true, 62 "aws_account": true, 63 "region": true, 64 "log_group_name": true, 65 "log_stream_name": true, 66 "sequence_number": true, 67 "item_hash": true, 68 } 69 70 // threshold in milliseconds to send the trace before a Lambda timeout occurs 71 const DefaultLambdaTimeoutThresholdMs = 200 72 73 func GetLambdaTimeoutThresholdMs() int { 74 timeoutThresholdMs := DefaultLambdaTimeoutThresholdMs 75 userDefinedThreshold, ok := os.LookupEnv("EPSAGON_LAMBDA_TIMEOUT_THRESHOLD_MS") 76 if ok { 77 if userDefinedThreshold, err := strconv.Atoi(userDefinedThreshold); err == nil { 78 timeoutThresholdMs = userDefinedThreshold 79 } 80 } 81 return timeoutThresholdMs 82 } 83 84 // Tracer is what a general program tracer has to provide 85 type Tracer interface { 86 AddEvent(*protocol.Event) 87 AddException(*protocol.Exception) 88 AddExceptionTypeAndMessage(string, string) 89 // AddLabel Adds a label to the trace that will be sent 90 AddLabel(string, interface{}) 91 // AddError Set an error to the trace that will be sent on the runner event 92 AddError(string, interface{}) 93 // GetRunnerEvent Returns the first event with "runner" as its Origin 94 GetRunnerEvent() *protocol.Event 95 // Starts the tracer event data collection 96 Start() 97 Running() bool 98 // Stop the tracer collecting data and send trace 99 SendStopSignal() 100 // Stop the tracer collecting data and send trace, waiting 101 // for the tracer to finish running 102 Stop() 103 Stopped() bool 104 GetConfig() *Config 105 } 106 107 // Config is the configuration for Epsagon's tracer 108 type Config struct { 109 ApplicationName string // Application name in Epsagon 110 Token string // Epsgaon Token 111 CollectorURL string // Epsagon collector url 112 MetadataOnly bool // Only send metadata about the event 113 Debug bool // Print Epsagon debug information 114 SendTimeout string // Timeout for sending traces to Epsagon 115 Disable bool // Disable sending traces 116 TestMode bool // TestMode sending traces 117 IgnoredKeys []string // IgnoredKeys are keys that will be masked from events metadata 118 MaxTraceSize int // MaxTraceSize is the maximum allowed trace size (in bytes) 119 } 120 121 type epsagonLabel struct { 122 key string 123 value interface{} 124 } 125 126 type epsagonTracer struct { 127 Config *Config 128 129 eventsPipe chan *protocol.Event 130 events []*protocol.Event 131 runnerExceptionPipe chan *protocol.Exception 132 exceptionsPipe chan *protocol.Exception 133 labelsPipe chan epsagonLabel 134 exceptions []*protocol.Exception 135 runnerException *protocol.Exception 136 labels map[string]interface{} 137 labelsSize int 138 139 closeCmd chan struct{} 140 stopped chan struct{} 141 running chan struct{} 142 } 143 144 // Start starts running the tracer in another goroutine and returns 145 // when it is ready, or after 1 second timeout 146 func (tracer *epsagonTracer) Start() { 147 go tracer.Run() 148 timer := time.NewTimer(time.Second) 149 select { 150 case <-tracer.running: 151 return 152 case <-timer.C: 153 log.Println("Epsagon Tracer couldn't start after one second timeout") 154 } 155 } 156 157 func (tracer *epsagonTracer) maskIgnoredKeys() { 158 for _, event := range tracer.events { 159 tracer.maskEventIgnoredKeys(event, tracer.Config.IgnoredKeys) 160 } 161 } 162 163 func (tracer *epsagonTracer) sendTraces() { 164 tracer.maskIgnoredKeys() 165 tracesReader, err := tracer.getTraceReader() 166 if err != nil { 167 // TODO create an exception and send a trace only with that 168 log.Printf("Epsagon: Encountered an error while marshaling the traces: %v\n", err) 169 return 170 } 171 sendTimeout, err := time.ParseDuration(tracer.Config.SendTimeout) 172 if err != nil { 173 if tracer.Config.Debug { 174 log.Printf("Epsagon: Encountered an error while parsing send timeout: %v, using '1s'\n", err) 175 } 176 sendTimeout, _ = time.ParseDuration("1s") 177 } 178 179 client := &http.Client{Timeout: sendTimeout} 180 181 if !tracer.Config.Disable { 182 if len(tracer.Config.Token) == 0 { 183 if tracer.Config.Debug { 184 log.Printf("Epsagon: empty token, not sending traces\n") 185 } 186 return 187 } 188 req, err := http.NewRequest(http.MethodPost, tracer.Config.CollectorURL, tracesReader) 189 if err == nil { 190 req.Header.Set("Content-Type", "application/json") 191 req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", tracer.Config.Token)) 192 HandleSendTracesResponse(client.Do(req)) 193 } else { 194 if tracer.Config.Debug { 195 log.Printf("Epsagon: Encountered an error while trying to send traces: %v\n", err) 196 } 197 } 198 199 } 200 } 201 202 // HandleSendTracesResponse handles responses from the trace collector 203 func HandleSendTracesResponse(resp *http.Response, err error) { 204 if err != nil { 205 log.Printf("Error while sending traces \n%v", err) 206 return 207 } 208 defer resp.Body.Close() 209 if resp.StatusCode >= http.StatusInternalServerError { 210 //safe to ignore the error here 211 respBody, _ := ioutil.ReadAll(resp.Body) 212 log.Printf("Error while sending traces \n%v", string(respBody)) 213 } 214 } 215 216 // GetRunnerEvent Gets the runner event, nil if not found 217 func (tracer *epsagonTracer) GetRunnerEvent() *protocol.Event { 218 for _, event := range tracer.events { 219 if event.Origin == "runner" { 220 return event 221 } 222 } 223 return nil 224 } 225 226 func (tracer *epsagonTracer) addRunnerLabels(event *protocol.Event) { 227 jsonString, err := json.Marshal(tracer.labels) 228 if err != nil { 229 if tracer.Config.Debug { 230 log.Printf("EPSAGON DEBUG failed appending labels") 231 } 232 } else { 233 event.Resource.Metadata[LabelsKey] = string(jsonString) 234 } 235 } 236 237 func (tracer *epsagonTracer) addRunnerException(event *protocol.Event) { 238 if tracer.runnerException != nil { 239 event.Exception = tracer.runnerException 240 event.ErrorCode = protocol.ErrorCode_ERROR 241 } 242 } 243 244 func isStrongKey(key string) bool { 245 _, ok := strongKeys[key] 246 return ok 247 } 248 249 func (tracer *epsagonTracer) stripEvents(traceLength int, marshaler *jsonpb.Marshaler) bool { 250 originalTraceLength := traceLength / 1024 251 eventSize := 0 252 for _, event := range tracer.events { 253 eventJSON, err := marshaler.MarshalToString(event) 254 if err != nil { 255 continue 256 } 257 eventSize = len(eventJSON) 258 for key, _ := range event.Resource.Metadata { 259 if !isStrongKey(key) { 260 delete(event.Resource.Metadata, key) 261 } 262 } 263 eventJSON, err = marshaler.MarshalToString(event) 264 if err != nil { 265 continue 266 } 267 strippedSize := eventSize - len(eventJSON) 268 traceLength -= strippedSize 269 if traceLength <= tracer.Config.MaxTraceSize { 270 if tracer.Config.Debug { 271 traceLength := traceLength / 1024 272 log.Printf("EPSAGON DEBUG trimmed trace from %dKB to %dKB (max allowed size: %dKB)", originalTraceLength, traceLength, MaxTraceSizeKB) 273 } 274 return true 275 } 276 } 277 return false 278 } 279 280 func (tracer *epsagonTracer) getTraceJSON(trace *protocol.Trace, runnerEvent *protocol.Event) (traceJSON string, err error) { 281 marshaler := jsonpb.Marshaler{ 282 EnumsAsInts: true, EmitDefaults: true, OrigName: true} 283 traceJSON, err = marshaler.MarshalToString(trace) 284 if err != nil { 285 return 286 } 287 traceLength := len(traceJSON) 288 if traceLength > tracer.Config.MaxTraceSize { 289 ok := tracer.stripEvents(traceLength, &marshaler) 290 if !ok { 291 err = errors.New(fmt.Sprintf("Trace is too big (max allowed size: %dKB)", tracer.Config.MaxTraceSize/1024)) 292 return 293 } 294 runnerEvent.Resource.Metadata[IsTrimmedKey] = "true" 295 traceJSON, err = marshaler.MarshalToString(trace) 296 } 297 return 298 } 299 300 func (tracer *epsagonTracer) getTraceReader() (io.Reader, error) { 301 version := "go " + runtime.Version() 302 runnerEvent := tracer.GetRunnerEvent() 303 if runnerEvent != nil { 304 tracer.addRunnerLabels(runnerEvent) 305 tracer.addRunnerException(runnerEvent) 306 } 307 trace := protocol.Trace{ 308 AppName: tracer.Config.ApplicationName, 309 Token: tracer.Config.Token, 310 Events: tracer.events, 311 Exceptions: tracer.exceptions, 312 Version: VERSION, 313 Platform: version, 314 } 315 if tracer.Config.Debug { 316 log.Printf("EPSAGON DEBUG sending trace: %+v\n", trace) 317 } 318 traceJSON, err := tracer.getTraceJSON(&trace, runnerEvent) 319 if err != nil { 320 return nil, err 321 } 322 if tracer.Config.Debug { 323 log.Printf("Final Traces: %s ", traceJSON) 324 } 325 return bytes.NewBuffer([]byte(traceJSON)), nil 326 } 327 328 func isChannelPinged(ch chan struct{}) bool { 329 select { 330 case <-ch: 331 return true 332 default: 333 return false 334 } 335 } 336 337 // Running return true iff the tracer has been running 338 func (tracer *epsagonTracer) Running() bool { 339 return isChannelPinged(tracer.running) 340 } 341 342 // Stopped return true iff the tracer has been closed 343 func (tracer *epsagonTracer) Stopped() bool { 344 return isChannelPinged(tracer.stopped) 345 } 346 347 func fillConfigDefaults(config *Config) { 348 if !config.Debug { 349 if strings.ToUpper(os.Getenv("EPSAGON_DEBUG")) == "TRUE" { 350 config.Debug = true 351 } 352 } 353 if len(config.Token) == 0 { 354 config.Token = os.Getenv("EPSAGON_TOKEN") 355 if config.Debug { 356 log.Println("EPSAGON DEBUG: setting token from environment variable") 357 } 358 } 359 if config.MaxTraceSize > MaxTraceSize || config.MaxTraceSize < 0 { 360 config.MaxTraceSize = DefaultMaxTraceSize 361 if config.Debug { 362 log.Printf("EPSAGON DEBUG: MaxTraceSize is invalid (must be <= %dKB), using default size (%dKB)\n", MaxTraceSizeKB, DefaultMaxTraceSizeKB) 363 } 364 } else { 365 rawTraceSize := os.Getenv(MaxTraceSizeEnvVar) 366 maxTraceSize, err := strconv.Atoi(rawTraceSize) 367 if err != nil || maxTraceSize <= 0 || maxTraceSize > MaxTraceSize { 368 config.MaxTraceSize = DefaultMaxTraceSize 369 } else { 370 config.MaxTraceSize = maxTraceSize 371 if config.Debug { 372 log.Printf("EPSAGON DEBUG: setting max trace size (%dKB) from environment variable\n", maxTraceSize/1024) 373 } 374 } 375 } 376 if config.MetadataOnly { 377 if strings.ToUpper(os.Getenv("EPSAGON_METADATA")) == "FALSE" { 378 config.MetadataOnly = false 379 } 380 } 381 if len(config.CollectorURL) == 0 { 382 envURL := os.Getenv("EPSAGON_COLLECTOR_URL") 383 if len(envURL) != 0 { 384 config.CollectorURL = envURL 385 } else { 386 region := os.Getenv("AWS_REGION") 387 if len(region) != 0 { 388 config.CollectorURL = fmt.Sprintf("https://%s.tc.epsagon.com", region) 389 } else { 390 config.CollectorURL = "https://us-east-1.tc.epsagon.com" 391 } 392 } 393 if config.Debug { 394 log.Printf("EPSAGON DEBUG: setting collector url to %s\n", config.CollectorURL) 395 } 396 } 397 sendTimeout := os.Getenv("EPSAGON_SEND_TIMEOUT_SEC") 398 if len(sendTimeout) != 0 { 399 config.SendTimeout = sendTimeout 400 if config.Debug { 401 log.Println("EPSAGON DEBUG: setting send timeout from environment variable") 402 } 403 } 404 } 405 406 // CreateTracer will initiallize a new epsagon tracer 407 func CreateTracer(config *Config) Tracer { 408 if config.TestMode { 409 return GlobalTracer 410 } 411 if config == nil { 412 config = &Config{} 413 } 414 fillConfigDefaults(config) 415 tracer := &epsagonTracer{ 416 Config: config, 417 eventsPipe: make(chan *protocol.Event), 418 events: make([]*protocol.Event, 0, 0), 419 exceptionsPipe: make(chan *protocol.Exception), 420 runnerExceptionPipe: make(chan *protocol.Exception), 421 exceptions: make([]*protocol.Exception, 0, 0), 422 closeCmd: make(chan struct{}), 423 stopped: make(chan struct{}), 424 running: make(chan struct{}), 425 labels: make(map[string]interface{}), 426 labelsPipe: make(chan epsagonLabel), 427 } 428 if config.Debug { 429 log.Println("EPSAGON DEBUG: Created a new tracer") 430 } 431 return tracer 432 } 433 434 // CreateTracer will initiallize a global epsagon tracer 435 func CreateGlobalTracer(config *Config) Tracer { 436 mutex.Lock() 437 defer mutex.Unlock() 438 if GlobalTracer != nil && !GlobalTracer.Stopped() { 439 log.Println("The tracer is already created, Closing and Creating.") 440 StopGlobalTracer() 441 } 442 GlobalTracer = CreateTracer(config) 443 return GlobalTracer 444 } 445 446 // AddException adds a tracing exception to the tracer 447 func (tracer *epsagonTracer) AddException(exception *protocol.Exception) { 448 defer func() { 449 recover() 450 }() 451 tracer.exceptionsPipe <- exception 452 } 453 454 // AddEvent adds an event to the tracer 455 func (tracer *epsagonTracer) AddEvent(event *protocol.Event) { 456 if tracer.Config.Debug { 457 log.Println("EPSAGON DEBUG: Adding event: ", event) 458 } 459 tracer.eventsPipe <- event 460 } 461 462 // AddEvent adds an event to the tracer 463 func AddEvent(event *protocol.Event) { 464 if GlobalTracer == nil || GlobalTracer.Stopped() { 465 // TODO 466 log.Println("The tracer is not initialized!") 467 return 468 } 469 GlobalTracer.AddEvent(event) 470 } 471 472 func (tracer *epsagonTracer) verifyLabel(label epsagonLabel) bool { 473 var valueSize = 0 474 switch label.value.(type) { 475 case int, float64, bool: 476 valueSize = strconv.IntSize 477 case string: 478 valueSize = len(label.value.(string)) 479 default: 480 if tracer.Config.Debug { 481 log.Println("EPSAGON DEBUG: Supported label types are: int, float, string, bool") 482 } 483 return false 484 } 485 if len(label.key)+valueSize+tracer.labelsSize > MaxLabelsSize { 486 return false 487 } 488 489 tracer.labelsSize += len(label.key) + valueSize 490 return true 491 } 492 493 // AddLabel adds a label to the tracer 494 func (tracer *epsagonTracer) AddLabel(key string, value interface{}) { 495 if tracer.Config.Debug { 496 log.Println("EPSAGON DEBUG: Adding label: ", key, value) 497 } 498 label := epsagonLabel{key, value} 499 tracer.labelsPipe <- label 500 } 501 502 // AddLabel adds a label to the tracer 503 func AddLabel(key string, value interface{}) { 504 if GlobalTracer == nil || GlobalTracer.Stopped() { 505 log.Println("The tracer is not initialized!") 506 return 507 } 508 GlobalTracer.AddLabel(key, value) 509 } 510 511 // AddException adds an exception to the tracer 512 func AddException(exception *protocol.Exception) { 513 defer func() { 514 if r := recover(); r != nil { 515 log.Println("Epsagon: Failed to add exception") 516 } 517 }() 518 if GlobalTracer == nil || GlobalTracer.Stopped() { 519 // TODO 520 log.Println("The tracer is not initialized!") 521 return 522 } 523 GlobalTracer.AddException(exception) 524 } 525 526 // Stop stops the tracer running routine 527 func (tracer *epsagonTracer) SendStopSignal() { 528 tracer.closeCmd <- struct{}{} 529 } 530 531 // Stop stops the tracer running routine, waiting for the tracer to finish 532 func (tracer *epsagonTracer) Stop() { 533 select { 534 case <-tracer.stopped: 535 return 536 default: 537 tracer.SendStopSignal() 538 <-tracer.stopped 539 } 540 } 541 542 // StopTracer will close the tracer and send all the data to the collector 543 func StopGlobalTracer() { 544 if GlobalTracer == nil || GlobalTracer.Stopped() { 545 // TODO 546 log.Println("The tracer is not initialized!") 547 return 548 } 549 GlobalTracer.Stop() 550 } 551 552 // Run starts the runner background routine that will 553 // run until it 554 func (tracer *epsagonTracer) Run() { 555 if tracer.Config.Debug { 556 log.Println("EPSAGON DEBUG: tracer started running") 557 } 558 if tracer.Running() { 559 return 560 } 561 close(tracer.running) 562 defer func() { tracer.running = make(chan struct{}) }() 563 defer close(tracer.stopped) 564 565 for { 566 select { 567 case event := <-tracer.eventsPipe: 568 tracer.events = append(tracer.events, event) 569 case exception := <-tracer.exceptionsPipe: 570 tracer.exceptions = append(tracer.exceptions, exception) 571 case exception := <-tracer.runnerExceptionPipe: 572 tracer.runnerException = exception 573 case label := <-tracer.labelsPipe: 574 if tracer.verifyLabel(label) { 575 tracer.labels[label.key] = label.value 576 } 577 case <-tracer.closeCmd: 578 if tracer.Config.Debug { 579 log.Println("EPSAGON DEBUG: tracer stops running, sending traces") 580 } 581 tracer.sendTraces() 582 return 583 } 584 } 585 } 586 587 func (tracer *epsagonTracer) GetConfig() *Config { 588 return tracer.Config 589 } 590 591 // GetGlobalTracerConfig returns the configuration of the global tracer 592 func GetGlobalTracerConfig() *Config { 593 if GlobalTracer == nil || GlobalTracer.Stopped() { 594 return &Config{} 595 } 596 return GlobalTracer.GetConfig() 597 } 598 599 func createException(exceptionType, msg string) *protocol.Exception { 600 stack := debug.Stack() 601 return &protocol.Exception{ 602 Type: exceptionType, 603 Message: msg, 604 Traceback: string(stack), 605 Time: GetTimestamp(), 606 } 607 } 608 609 // AddExceptionTypeAndMessage adds an exception to the current tracer with 610 // the current stack and time. 611 // exceptionType, msg are strings that will be added to the exception 612 func (tracer *epsagonTracer) AddExceptionTypeAndMessage(exceptionType, msg string) { 613 tracer.AddException(createException(exceptionType, msg)) 614 } 615 616 func (tracer *epsagonTracer) AddError(errorType string, value interface{}) { 617 var message string 618 switch value.(type) { 619 case string: 620 message = value.(string) 621 case error: 622 message = value.(error).Error() 623 default: 624 if tracer.Config.Debug { 625 log.Println("EPSAGON DEBUG: Supported error types are: string, error") 626 } 627 return 628 } 629 if tracer.Config.Debug { 630 log.Println("EPSAGON DEBUG: Adding error message to trace: ", message) 631 } 632 exception := createException(errorType, message) 633 tracer.runnerExceptionPipe <- exception 634 }