github.com/pinpoint-apm/pinpoint-go-agent@v1.4.1-0.20240110120318-a50c2eb18c8c/agent.go (about) 1 package pinpoint 2 3 import ( 4 "errors" 5 "io" 6 "strconv" 7 "sync" 8 "sync/atomic" 9 "testing" 10 "time" 11 12 lru "github.com/hashicorp/golang-lru" 13 pb "github.com/pinpoint-apm/pinpoint-go-agent/protobuf" 14 "github.com/spaolacci/murmur3" 15 ) 16 17 func init() { 18 initLogger() 19 initConfig() 20 initGoroutine() 21 globalAgent = NoopAgent() 22 } 23 24 type agent struct { 25 appName string 26 appType int32 27 agentID string 28 agentName string 29 30 startTime int64 31 sequence int64 32 agentGrpc *agentGrpc 33 spanGrpc *spanGrpc 34 statGrpc *statGrpc 35 cmdGrpc *cmdGrpc 36 spanChan chan *span 37 metaChan chan interface{} 38 urlStatChan chan *urlStat 39 statChan chan *pb.PStatMessage 40 sampler traceSampler 41 42 errorCache *lru.Cache 43 errorIdGen int32 44 sqlCache *lru.Cache 45 sqlIdGen int32 46 sqlUidCache *lru.Cache 47 apiCache *lru.Cache 48 apiIdGen int32 49 50 config *Config 51 wg sync.WaitGroup 52 enable bool 53 shutdown bool 54 } 55 56 type apiMeta struct { 57 id int32 58 descriptor string 59 apiType int 60 } 61 62 type stringMeta struct { 63 id int32 64 funcName string 65 } 66 67 type sqlMeta struct { 68 id int32 69 sql string 70 } 71 72 type sqlUidMeta struct { 73 uid []byte 74 sql string 75 } 76 77 type exceptionMeta struct { 78 txId TransactionId 79 spanId int64 80 uriTemplate string 81 exceptions []*exception 82 } 83 84 type exception struct { 85 exceptionId int64 86 callstack *errorWithCallStack 87 } 88 89 const ( 90 cacheSize = 1024 91 defaultQueueSize = 1024 92 ) 93 94 var globalAgent Agent 95 96 // GetAgent returns a global Agent created by NewAgent. 97 func GetAgent() Agent { 98 return globalAgent 99 } 100 101 // NewAgent creates an Agent and spawns goroutines that manage spans and statistical data. 102 // The generated Agent is maintained globally and only one instance is retained. 103 // The provided config is generated by NewConfig and an error is returned if it is nil. 104 // 105 // example: 106 // 107 // opts := []pinpoint.ConfigOption{ 108 // pinpoint.WithAppName("GoTestApp"), 109 // pinpoint.WithConfigFile(os.Getenv("HOME") + "/tmp/pinpoint-config.yaml"), 110 // } 111 // cfg, err := pinpoint.NewConfig(opts...) 112 // agent, err := pinpoint.NewAgent(cfg) 113 func NewAgent(config *Config) (Agent, error) { 114 if globalAgent != NoopAgent() { 115 return globalAgent, errors.New("agent is already created") 116 } 117 if config == nil { 118 return NoopAgent(), errors.New("configuration is missing") 119 } 120 121 logger.setup(config) 122 if err := config.checkNameAndID(); err != nil { 123 return NoopAgent(), err 124 } 125 if !config.Bool(CfgEnable) { 126 return NoopAgent(), nil 127 } 128 129 Log("agent").Infof("new pinpoint agent") 130 config.printConfigString() 131 132 agent := &agent{ 133 appName: config.String(CfgAppName), 134 appType: int32(config.Int(CfgAppType)), 135 agentID: config.String(CfgAgentID), 136 agentName: config.String(CfgAgentName), 137 startTime: time.Now().UnixNano() / int64(time.Millisecond), 138 spanChan: make(chan *span, config.Int(CfgSpanQueueSize)), 139 metaChan: make(chan interface{}, config.Int(CfgSpanQueueSize)), 140 urlStatChan: make(chan *urlStat, config.Int(CfgSpanQueueSize)), 141 statChan: make(chan *pb.PStatMessage, config.Int(CfgSpanQueueSize)), 142 config: config, 143 } 144 145 var err error 146 if agent.errorCache, err = lru.New(cacheSize); err != nil { 147 return NoopAgent(), err 148 } 149 if agent.sqlCache, err = lru.New(cacheSize); err != nil { 150 return NoopAgent(), err 151 } 152 if agent.sqlUidCache, err = lru.New(cacheSize); err != nil { 153 return NoopAgent(), err 154 } 155 if agent.apiCache, err = lru.New(cacheSize); err != nil { 156 return NoopAgent(), err 157 } 158 159 agent.newSampler() 160 samplingOpts := []string{CfgSamplingType, CfgSamplingCounterRate, CfgSamplingPercentRate, CfgSamplingNewThroughput, CfgSamplingContinueThroughput} 161 config.AddReloadCallback(samplingOpts, agent.newSampler) 162 config.AddReloadCallback([]string{CfgLogLevel}, logger.reloadLevel) 163 config.AddReloadCallback([]string{CfgLogOutput, CfgLogMaxSize}, logger.reloadOutput) 164 165 if !config.offGrpc { 166 go agent.connectGrpcServer() 167 } 168 globalAgent = agent 169 return agent, nil 170 } 171 172 func (agent *agent) newSampler() { 173 config := agent.config 174 var baseSampler sampler 175 if config.String(CfgSamplingType) == samplingTypeCounter { 176 baseSampler = newRateSampler(config.Int(CfgSamplingCounterRate)) 177 } else { 178 baseSampler = newPercentSampler(config.Float(CfgSamplingPercentRate)) 179 } 180 181 if config.Int(CfgSamplingNewThroughput) > 0 || config.Int(CfgSamplingContinueThroughput) > 0 { 182 agent.sampler = newThroughputLimitTraceSampler(baseSampler, config.Int(CfgSamplingNewThroughput), 183 config.Int(CfgSamplingContinueThroughput)) 184 } else { 185 agent.sampler = newBasicTraceSampler(baseSampler) 186 } 187 } 188 189 func (agent *agent) connectGrpcServer() { 190 var err error 191 192 if agent.agentGrpc, err = newAgentGrpc(agent); err != nil { 193 return 194 } 195 if !agent.agentGrpc.registerAgentWithRetry() { 196 return 197 } 198 if agent.spanGrpc, err = newSpanGrpc(agent); err != nil { 199 return 200 } 201 if agent.statGrpc, err = newStatGrpc(agent); err != nil { 202 return 203 } 204 if agent.cmdGrpc, err = newCommandGrpc(agent); err != nil { 205 return 206 } 207 208 agent.enable = true 209 go agent.sendPingWorker() 210 go agent.sendSpanWorker() 211 go agent.runCommandService() 212 go agent.sendMetaWorker() 213 go agent.collectAgentStatWorker() 214 go agent.collectUrlStatWorker() 215 go agent.sendUrlStatWorker() 216 go agent.sendStatsWorker() 217 agent.wg.Add(8) 218 } 219 220 func (agent *agent) Shutdown() { 221 agent.shutdown = true 222 Log("agent").Infof("shutdown pinpoint agent") 223 224 if !agent.enable { 225 return 226 } 227 228 agent.enable = false 229 globalAgent = NoopAgent() 230 231 close(agent.spanChan) 232 close(agent.metaChan) 233 close(agent.urlStatChan) 234 close(agent.statChan) 235 236 //To terminate the listening state of the command stream, 237 //close the command grpc channel first 238 if agent.cmdGrpc != nil { 239 agent.cmdGrpc.close() 240 } 241 242 agent.wg.Wait() 243 244 if agent.agentGrpc != nil { 245 agent.agentGrpc.close() 246 } 247 if agent.spanGrpc != nil { 248 agent.spanGrpc.close() 249 } 250 if agent.statGrpc != nil { 251 agent.statGrpc.close() 252 } 253 } 254 255 func (agent *agent) NewSpanTracer(operation string, rpcName string) Tracer { 256 var tracer Tracer 257 258 if agent.enable { 259 reader := &noopDistributedTracingContextReader{} 260 tracer = agent.NewSpanTracerWithReader(operation, rpcName, reader) 261 } else { 262 tracer = NoopTracer() 263 } 264 return tracer 265 } 266 267 func (agent *agent) NewSpanTracerWithReader(operation string, rpcName string, reader DistributedTracingContextReader) Tracer { 268 if !agent.enable || reader == nil { 269 return NoopTracer() 270 } 271 272 sampled := reader.Get(HeaderSampled) 273 if sampled == "s0" { 274 incrUnSampleCont() 275 return newUnSampledSpan(agent, rpcName) 276 } 277 278 tid := reader.Get(HeaderTraceId) 279 if tid == "" { 280 return agent.samplingSpan(func() bool { return agent.sampler.isNewSampled() }, operation, rpcName, reader) 281 } else { 282 return agent.samplingSpan(func() bool { return agent.sampler.isContinueSampled() }, operation, rpcName, reader) 283 } 284 } 285 286 func (agent *agent) samplingSpan(samplingFunc func() bool, operation string, rpcName string, reader DistributedTracingContextReader) Tracer { 287 if samplingFunc() { 288 tracer := newSampledSpan(agent, operation, rpcName) 289 tracer.Extract(reader) 290 return tracer 291 } else { 292 return newUnSampledSpan(agent, rpcName) 293 } 294 } 295 296 func (agent *agent) generateTransactionId() TransactionId { 297 atomic.AddInt64(&agent.sequence, 1) 298 return TransactionId{agent.agentID, agent.startTime, agent.sequence} 299 } 300 301 func (agent *agent) Enable() bool { 302 return agent.enable 303 } 304 305 func (agent *agent) Config() *Config { 306 return agent.config 307 } 308 309 func (agent *agent) sendPingWorker() { 310 Log("agent").Infof("start ping goroutine") 311 defer agent.wg.Done() 312 stream := agent.agentGrpc.newPingStreamWithRetry() 313 314 for agent.enable { 315 err := stream.sendPing() 316 if err != nil { 317 if err != io.EOF { 318 Log("agent").Errorf("send ping - %v", err) 319 } 320 321 stream.close() 322 stream = agent.agentGrpc.newPingStreamWithRetry() 323 } 324 325 time.Sleep(60 * time.Second) 326 } 327 328 stream.close() 329 Log("agent").Infof("end ping goroutine") 330 } 331 332 func (agent *agent) sendSpanWorker() { 333 Log("agent").Infof("start span goroutine") 334 defer agent.wg.Done() 335 336 var ( 337 skipOldSpan = bool(false) 338 skipBaseTime time.Time 339 ) 340 341 stream := agent.spanGrpc.newSpanStreamWithRetry() 342 for span := range agent.spanChan { 343 if !agent.enable { 344 break 345 } 346 347 if skipOldSpan { 348 if span.startTime.Before(skipBaseTime) { 349 continue //skip old span 350 } else { 351 skipOldSpan = false 352 } 353 } 354 355 err := stream.sendSpan(span) 356 if err != nil { 357 if err != io.EOF { 358 Log("agent").Errorf("send span - %v", err) 359 } 360 361 stream.close() 362 stream = agent.spanGrpc.newSpanStreamWithRetry() 363 364 skipOldSpan = true 365 skipBaseTime = time.Now().Add(-time.Second * 1) 366 } 367 } 368 369 stream.close() 370 Log("agent").Infof("end span goroutine") 371 } 372 373 func (agent *agent) enqueueSpan(span *span) bool { 374 if !agent.enable { 375 return false 376 } 377 378 select { 379 case agent.spanChan <- span: 380 return true 381 default: 382 break 383 } 384 385 <-agent.spanChan 386 return false 387 } 388 389 func (agent *agent) sendMetaWorker() { 390 Log("agent").Infof("start meta goroutine") 391 defer agent.wg.Done() 392 393 for md := range agent.metaChan { 394 if !agent.enable { 395 break 396 } 397 398 var success bool 399 switch md.(type) { 400 case apiMeta: 401 api := md.(apiMeta) 402 success = agent.agentGrpc.sendApiMetadataWithRetry(api.id, api.descriptor, -1, api.apiType) 403 break 404 case stringMeta: 405 str := md.(stringMeta) 406 success = agent.agentGrpc.sendStringMetadataWithRetry(str.id, str.funcName) 407 break 408 case sqlMeta: 409 sql := md.(sqlMeta) 410 success = agent.agentGrpc.sendSqlMetadataWithRetry(sql.id, sql.sql) 411 break 412 case sqlUidMeta: 413 sql := md.(sqlUidMeta) 414 success = agent.agentGrpc.sendSqlUidMetadataWithRetry(sql.uid, sql.sql) 415 break 416 case exceptionMeta: 417 em := md.(exceptionMeta) 418 success = agent.agentGrpc.sendExceptionMetadataWithRetry(&em) 419 } 420 421 if !success { 422 agent.deleteMetaCache(md) 423 } 424 } 425 426 Log("agent").Infof("end meta goroutine") 427 } 428 429 func (agent *agent) deleteMetaCache(md interface{}) { 430 switch md.(type) { 431 case apiMeta: 432 api := md.(apiMeta) 433 key := api.descriptor + "_" + strconv.Itoa(api.apiType) 434 agent.apiCache.Remove(key) 435 break 436 case stringMeta: 437 agent.errorCache.Remove(md.(stringMeta).funcName) 438 break 439 case sqlMeta: 440 agent.sqlCache.Remove(md.(sqlMeta).sql) 441 break 442 case sqlUidMeta: 443 agent.sqlUidCache.Remove(md.(sqlUidMeta).sql) 444 break 445 case exceptionMeta: 446 break 447 } 448 } 449 450 func (agent *agent) tryEnqueueMeta(md interface{}) bool { 451 if !agent.enable { 452 return false 453 } 454 455 select { 456 case agent.metaChan <- md: 457 return true 458 default: 459 break 460 } 461 462 <-agent.metaChan 463 return false 464 } 465 466 func (agent *agent) cacheError(errorName string) int32 { 467 if !agent.enable { 468 return 0 469 } 470 471 if v, ok := agent.errorCache.Peek(errorName); ok { 472 return v.(int32) 473 } 474 475 id := atomic.AddInt32(&agent.errorIdGen, 1) 476 agent.errorCache.Add(errorName, id) 477 478 md := stringMeta{ 479 id: id, 480 funcName: errorName, 481 } 482 agent.tryEnqueueMeta(md) 483 484 Log("agent").Infof("cache error id: %d, %s", id, errorName) 485 return id 486 } 487 488 func (agent *agent) cacheSql(sql string) int32 { 489 if !agent.enable { 490 return 0 491 } 492 493 if v, ok := agent.sqlCache.Peek(sql); ok { 494 return v.(int32) 495 } 496 497 id := atomic.AddInt32(&agent.sqlIdGen, 1) 498 agent.sqlCache.Add(sql, id) 499 500 md := sqlMeta{ 501 id: id, 502 sql: sql, 503 } 504 agent.tryEnqueueMeta(md) 505 506 Log("agent").Infof("cache sql id: %d, %s", id, sql) 507 return id 508 } 509 510 func (agent *agent) cacheSqlUid(sql string) []byte { 511 if !agent.enable { 512 return nil 513 } 514 515 if v, ok := agent.sqlUidCache.Peek(sql); ok { 516 return v.([]byte) 517 } 518 519 hash := murmur3.New128() 520 hash.Write([]byte(sql)) 521 uid := hash.Sum(nil) 522 agent.sqlUidCache.Add(sql, uid) 523 524 md := sqlUidMeta{ 525 uid: uid, 526 sql: sql, 527 } 528 agent.tryEnqueueMeta(md) 529 530 Log("agent").Infof("cache sql uid: %#v, %s", uid, sql) 531 return uid 532 } 533 534 func (agent *agent) cacheSpanApi(descriptor string, apiType int) int32 { 535 if !agent.enable { 536 return 0 537 } 538 539 key := descriptor + "_" + strconv.Itoa(apiType) 540 541 if v, ok := agent.apiCache.Peek(key); ok { 542 return v.(int32) 543 } 544 545 id := atomic.AddInt32(&agent.apiIdGen, 1) 546 agent.apiCache.Add(key, id) 547 548 md := apiMeta{} 549 md.id = id 550 md.descriptor = descriptor 551 md.apiType = apiType 552 agent.tryEnqueueMeta(md) 553 554 Log("agent").Infof("cache api id: %d, %s", id, key) 555 return id 556 } 557 558 func (agent *agent) enqueueExceptionMeta(span *span) { 559 if !agent.enable || !agent.config.errorTraceCallStack { 560 return 561 } 562 563 md := exceptionMeta{ 564 txId: span.txId, 565 spanId: span.spanId, 566 exceptions: span.errorChains, 567 } 568 if span.urlStat != nil { 569 md.uriTemplate = span.urlStat.Url 570 } else { 571 md.uriTemplate = "NULL" 572 } 573 574 agent.tryEnqueueMeta(md) 575 Log("agent").Debugf("enqueue exception meta: %v", md) 576 } 577 578 func (agent *agent) enqueueUrlStat(stat *urlStat) bool { 579 if !agent.enable { 580 return false 581 } 582 583 select { 584 case agent.urlStatChan <- stat: 585 return true 586 default: 587 break 588 } 589 590 <-agent.urlStatChan 591 Log("agent").Tracef("url stat channel - max capacity reached or closed") 592 return false 593 } 594 595 func (agent *agent) collectUrlStatWorker() { 596 Log("agent").Infof("start collect uri stat goroutine") 597 defer agent.wg.Done() 598 599 agent.initUrlStat() 600 601 for uri := range agent.urlStatChan { 602 if !agent.enable { 603 break 604 } 605 snapshot := agent.currentUrlStatSnapshot() 606 snapshot.add(uri) 607 } 608 609 Log("agent").Infof("end collect uri stat goroutine") 610 } 611 612 func (agent *agent) sendUrlStatWorker() { 613 Log("agent").Infof("start send uri stat goroutine") 614 defer agent.wg.Done() 615 616 interval := 30 * time.Second 617 time.Sleep(interval) 618 619 for agent.enable { 620 if agent.config.collectUrlStat { 621 snapshot := agent.takeUrlStatSnapshot() 622 agent.enqueueStat(makePAgentUriStat(snapshot)) 623 } 624 time.Sleep(interval) 625 } 626 627 Log("agent").Infof("end send uri stat goroutine") 628 } 629 630 func (agent *agent) enqueueStat(stat *pb.PStatMessage) bool { 631 select { 632 case agent.statChan <- stat: 633 return true 634 default: 635 break 636 } 637 638 <-agent.statChan 639 return false 640 } 641 642 func (agent *agent) sendStatsWorker() { 643 Log("agent").Infof("start send stats goroutine") 644 defer agent.wg.Done() 645 646 stream := agent.statGrpc.newStatStreamWithRetry() 647 for stats := range agent.statChan { 648 if !agent.enable { 649 break 650 } 651 652 err := stream.sendStats(stats) 653 if err != nil { 654 if err != io.EOF { 655 Log("stats").Errorf("send stats - %v", err) 656 } 657 658 stream.close() 659 stream = agent.statGrpc.newStatStreamWithRetry() 660 } 661 } 662 stream.close() 663 664 Log("agent").Infof("end send stats goroutine") 665 } 666 667 func NewTestAgent(config *Config, t *testing.T) (Agent, error) { 668 config.offGrpc = true 669 logger.setup(config) 670 671 agent := &agent{ 672 appName: config.String(CfgAppName), 673 appType: int32(config.Int(CfgAppType)), 674 agentID: config.String(CfgAgentID), 675 agentName: config.String(CfgAgentName), 676 startTime: time.Now().UnixNano() / int64(time.Millisecond), 677 spanChan: make(chan *span, config.Int(CfgSpanQueueSize)), 678 metaChan: make(chan interface{}, config.Int(CfgSpanQueueSize)), 679 urlStatChan: make(chan *urlStat, config.Int(CfgSpanQueueSize)), 680 statChan: make(chan *pb.PStatMessage, config.Int(CfgSpanQueueSize)), 681 config: config, 682 } 683 agent.errorCache, _ = lru.New(cacheSize) 684 agent.sqlCache, _ = lru.New(cacheSize) 685 agent.sqlUidCache, _ = lru.New(cacheSize) 686 agent.apiCache, _ = lru.New(cacheSize) 687 agent.newSampler() 688 689 agent.agentGrpc = newMockAgentGrpc(agent, t) 690 //agent.spanGrpc = newMockSpanGrpc(agent, t) 691 //agent.statGrpc = newMockStatGrpc(agent, t) 692 693 globalAgent = agent 694 agent.enable = true 695 696 return agent, nil 697 }