git.colasdn.top/newrelic/go-agent@v3.26.0+incompatible/internal_app.go (about) 1 // Copyright 2020 New Relic Corporation. All rights reserved. 2 // SPDX-License-Identifier: Apache-2.0 3 4 package newrelic 5 6 import ( 7 "errors" 8 "fmt" 9 "io" 10 "math" 11 "net/http" 12 "os" 13 "strings" 14 "sync" 15 "time" 16 17 "github.com/newrelic/go-agent/internal" 18 "github.com/newrelic/go-agent/internal/logger" 19 ) 20 21 type dataConsumer interface { 22 Consume(internal.AgentRunID, internal.Harvestable) 23 } 24 25 type appData struct { 26 id internal.AgentRunID 27 data internal.Harvestable 28 } 29 30 type app struct { 31 Logger 32 config Config 33 rpmControls internal.RpmControls 34 testHarvest *internal.Harvest 35 36 // placeholderRun is used when the application is not connected. 37 placeholderRun *appRun 38 39 // initiateShutdown is used to tell the processor to shutdown. 40 initiateShutdown chan struct{} 41 42 // shutdownStarted and shutdownComplete are closed by the processor 43 // goroutine to indicate the shutdown status. Two channels are used so 44 // that the call of app.Shutdown() can block until shutdown has 45 // completed but other goroutines can exit when shutdown has started. 46 // This is not just an optimization: This prevents a deadlock if 47 // harvesting data during the shutdown fails and an attempt is made to 48 // merge the data into the next harvest. 49 shutdownStarted chan struct{} 50 shutdownComplete chan struct{} 51 52 // Sends to these channels should not occur without a <-shutdownStarted 53 // select option to prevent deadlock. 54 dataChan chan appData 55 collectorErrorChan chan internal.RPMResponse 56 connectChan chan *appRun 57 58 // This mutex protects both `run` and `err`, both of which should only 59 // be accessed using getState and setState. 60 sync.RWMutex 61 // run is non-nil when the app is successfully connected. It is 62 // immutable. 63 run *appRun 64 // err is non-nil if the application will never be connected again 65 // (disconnect, license exception, shutdown). 66 err error 67 68 serverless *internal.ServerlessHarvest 69 } 70 71 func (app *app) doHarvest(h *internal.Harvest, harvestStart time.Time, run *appRun) { 72 h.CreateFinalMetrics(run.Reply, run) 73 74 payloads := h.Payloads(app.config.DistributedTracer.Enabled) 75 for _, p := range payloads { 76 cmd := p.EndpointMethod() 77 data, err := p.Data(run.Reply.RunID.String(), harvestStart) 78 79 if nil != err { 80 app.Warn("unable to create harvest data", map[string]interface{}{ 81 "cmd": cmd, 82 "error": err.Error(), 83 }) 84 continue 85 } 86 if nil == data { 87 continue 88 } 89 90 call := internal.RpmCmd{ 91 Collector: run.Reply.Collector, 92 RunID: run.Reply.RunID.String(), 93 Name: cmd, 94 Data: data, 95 RequestHeadersMap: run.Reply.RequestHeadersMap, 96 MaxPayloadSize: run.Reply.MaxPayloadSizeInBytes, 97 } 98 99 resp := internal.CollectorRequest(call, app.rpmControls) 100 101 if resp.IsDisconnect() || resp.IsRestartException() { 102 select { 103 case app.collectorErrorChan <- resp: 104 case <-app.shutdownStarted: 105 } 106 return 107 } 108 109 if nil != resp.Err { 110 app.Warn("harvest failure", map[string]interface{}{ 111 "cmd": cmd, 112 "error": resp.Err.Error(), 113 "retain_data": resp.ShouldSaveHarvestData(), 114 }) 115 } 116 117 if resp.ShouldSaveHarvestData() { 118 app.Consume(run.Reply.RunID, p) 119 } 120 } 121 } 122 123 func (app *app) connectRoutine() { 124 connectAttempt := 0 125 for { 126 reply, resp := internal.ConnectAttempt(config{app.config}, 127 app.config.SecurityPoliciesToken, app.config.HighSecurity, app.rpmControls) 128 129 if reply != nil { 130 select { 131 case app.connectChan <- newAppRun(app.config, reply): 132 case <-app.shutdownStarted: 133 } 134 return 135 } 136 137 if resp.IsDisconnect() { 138 select { 139 case app.collectorErrorChan <- resp: 140 case <-app.shutdownStarted: 141 } 142 return 143 } 144 145 if nil != resp.Err { 146 app.Warn("application connect failure", map[string]interface{}{ 147 "error": resp.Err.Error(), 148 }) 149 } 150 151 backoff := getConnectBackoffTime(connectAttempt) 152 time.Sleep(time.Duration(backoff) * time.Second) 153 connectAttempt++ 154 } 155 } 156 157 // Connect backoff time follows the sequence defined at 158 // https://source.datanerd.us/agents/agent-specs/blob/master/Collector-Response-Handling.md#retries-and-backoffs 159 func getConnectBackoffTime(attempt int) int { 160 connectBackoffTimes := [...]int{15, 15, 30, 60, 120, 300} 161 l := len(connectBackoffTimes) 162 if (attempt < 0) || (attempt >= l) { 163 return connectBackoffTimes[l-1] 164 } 165 return connectBackoffTimes[attempt] 166 } 167 168 func processConnectMessages(run *appRun, lg Logger) { 169 for _, msg := range run.Reply.Messages { 170 event := "collector message" 171 cn := map[string]interface{}{"msg": msg.Message} 172 173 switch strings.ToLower(msg.Level) { 174 case "error": 175 lg.Error(event, cn) 176 case "warn": 177 lg.Warn(event, cn) 178 case "info": 179 lg.Info(event, cn) 180 case "debug", "verbose": 181 lg.Debug(event, cn) 182 } 183 } 184 } 185 186 func (app *app) process() { 187 // Both the harvest and the run are non-nil when the app is connected, 188 // and nil otherwise. 189 var h *internal.Harvest 190 var run *appRun 191 192 harvestTicker := time.NewTicker(time.Second) 193 defer harvestTicker.Stop() 194 195 for { 196 select { 197 case <-harvestTicker.C: 198 if nil != run { 199 now := time.Now() 200 if ready := h.Ready(now); nil != ready { 201 go app.doHarvest(ready, now, run) 202 } 203 } 204 case d := <-app.dataChan: 205 if nil != run && run.Reply.RunID == d.id { 206 d.data.MergeIntoHarvest(h) 207 } 208 case <-app.initiateShutdown: 209 close(app.shutdownStarted) 210 211 // Remove the run before merging any final data to 212 // ensure a bounded number of receives from dataChan. 213 app.setState(nil, errors.New("application shut down")) 214 215 if nil != run { 216 for done := false; !done; { 217 select { 218 case d := <-app.dataChan: 219 if run.Reply.RunID == d.id { 220 d.data.MergeIntoHarvest(h) 221 } 222 default: 223 done = true 224 } 225 } 226 app.doHarvest(h, time.Now(), run) 227 } 228 229 close(app.shutdownComplete) 230 return 231 case resp := <-app.collectorErrorChan: 232 run = nil 233 h = nil 234 app.setState(nil, nil) 235 236 if resp.IsDisconnect() { 237 app.setState(nil, resp.Err) 238 app.Error("application disconnected", map[string]interface{}{ 239 "app": app.config.AppName, 240 }) 241 } else if resp.IsRestartException() { 242 app.Info("application restarted", map[string]interface{}{ 243 "app": app.config.AppName, 244 }) 245 go app.connectRoutine() 246 } 247 case run = <-app.connectChan: 248 h = internal.NewHarvest(time.Now(), run) 249 app.setState(run, nil) 250 251 app.Info("application connected", map[string]interface{}{ 252 "app": app.config.AppName, 253 "run": run.Reply.RunID.String(), 254 }) 255 processConnectMessages(run, app) 256 } 257 } 258 } 259 260 func (app *app) Shutdown(timeout time.Duration) { 261 if !app.config.Enabled { 262 return 263 } 264 if app.config.ServerlessMode.Enabled { 265 return 266 } 267 268 select { 269 case app.initiateShutdown <- struct{}{}: 270 default: 271 } 272 273 // Block until shutdown is done or timeout occurs. 274 t := time.NewTimer(timeout) 275 select { 276 case <-app.shutdownComplete: 277 case <-t.C: 278 } 279 t.Stop() 280 281 app.Info("application shutdown", map[string]interface{}{ 282 "app": app.config.AppName, 283 }) 284 } 285 286 func runSampler(app *app, period time.Duration) { 287 previous := internal.GetSample(time.Now(), app) 288 t := time.NewTicker(period) 289 for { 290 select { 291 case now := <-t.C: 292 current := internal.GetSample(now, app) 293 run, _ := app.getState() 294 app.Consume(run.Reply.RunID, internal.GetStats(internal.Samples{ 295 Previous: previous, 296 Current: current, 297 })) 298 previous = current 299 case <-app.shutdownStarted: 300 t.Stop() 301 return 302 } 303 } 304 } 305 306 func (app *app) WaitForConnection(timeout time.Duration) error { 307 if !app.config.Enabled { 308 return nil 309 } 310 if app.config.ServerlessMode.Enabled { 311 return nil 312 } 313 deadline := time.Now().Add(timeout) 314 pollPeriod := 50 * time.Millisecond 315 316 for { 317 run, err := app.getState() 318 if nil != err { 319 return err 320 } 321 if run.Reply.RunID != "" { 322 return nil 323 } 324 if time.Now().After(deadline) { 325 return fmt.Errorf("timeout out after %s", timeout.String()) 326 } 327 time.Sleep(pollPeriod) 328 } 329 } 330 331 func newApp(c Config) (Application, error) { 332 c = copyConfigReferenceFields(c) 333 if err := c.Validate(); nil != err { 334 return nil, err 335 } 336 if nil == c.Logger { 337 c.Logger = logger.ShimLogger{} 338 } 339 app := &app{ 340 Logger: c.Logger, 341 config: c, 342 placeholderRun: newAppRun(c, internal.ConnectReplyDefaults()), 343 344 // This channel must be buffered since Shutdown makes a 345 // non-blocking send attempt. 346 initiateShutdown: make(chan struct{}, 1), 347 348 shutdownStarted: make(chan struct{}), 349 shutdownComplete: make(chan struct{}), 350 connectChan: make(chan *appRun, 1), 351 collectorErrorChan: make(chan internal.RPMResponse, 1), 352 dataChan: make(chan appData, internal.AppDataChanSize), 353 rpmControls: internal.RpmControls{ 354 License: c.License, 355 Client: &http.Client{ 356 Transport: c.Transport, 357 Timeout: internal.CollectorTimeout, 358 }, 359 Logger: c.Logger, 360 AgentVersion: Version, 361 }, 362 } 363 364 app.Info("application created", map[string]interface{}{ 365 "app": app.config.AppName, 366 "version": Version, 367 "enabled": app.config.Enabled, 368 }) 369 370 if app.config.Enabled { 371 if app.config.ServerlessMode.Enabled { 372 reply := newServerlessConnectReply(c) 373 app.run = newAppRun(c, reply) 374 app.serverless = internal.NewServerlessHarvest(c.Logger, Version, os.Getenv) 375 } else { 376 go app.process() 377 go app.connectRoutine() 378 if app.config.RuntimeSampler.Enabled { 379 go runSampler(app, internal.RuntimeSamplerPeriod) 380 } 381 } 382 } 383 384 return app, nil 385 } 386 387 var ( 388 _ internal.HarvestTestinger = &app{} 389 _ internal.Expect = &app{} 390 ) 391 392 func (app *app) HarvestTesting(replyfn func(*internal.ConnectReply)) { 393 if nil != replyfn { 394 reply := internal.ConnectReplyDefaults() 395 replyfn(reply) 396 app.placeholderRun = newAppRun(app.config, reply) 397 } 398 app.testHarvest = internal.NewHarvest(time.Now(), &internal.DfltHarvestCfgr{}) 399 } 400 401 func (app *app) getState() (*appRun, error) { 402 app.RLock() 403 defer app.RUnlock() 404 405 run := app.run 406 if nil == run { 407 run = app.placeholderRun 408 } 409 return run, app.err 410 } 411 412 func (app *app) setState(run *appRun, err error) { 413 app.Lock() 414 defer app.Unlock() 415 416 app.run = run 417 app.err = err 418 } 419 420 // StartTransaction implements newrelic.Application's StartTransaction. 421 func (app *app) StartTransaction(name string, w http.ResponseWriter, r *http.Request) Transaction { 422 run, _ := app.getState() 423 txn := upgradeTxn(newTxn(txnInput{ 424 app: app, 425 appRun: run, 426 writer: w, 427 Consumer: app, 428 }, name)) 429 430 if nil != r { 431 txn.SetWebRequest(NewWebRequest(r)) 432 } 433 return txn 434 } 435 436 var ( 437 errHighSecurityEnabled = errors.New("high security enabled") 438 errCustomEventsDisabled = errors.New("custom events disabled") 439 errCustomEventsRemoteDisabled = errors.New("custom events disabled by server") 440 ) 441 442 // RecordCustomEvent implements newrelic.Application's RecordCustomEvent. 443 func (app *app) RecordCustomEvent(eventType string, params map[string]interface{}) error { 444 if app.config.HighSecurity { 445 return errHighSecurityEnabled 446 } 447 448 if !app.config.CustomInsightsEvents.Enabled { 449 return errCustomEventsDisabled 450 } 451 452 event, e := internal.CreateCustomEvent(eventType, params, time.Now()) 453 if nil != e { 454 return e 455 } 456 457 run, _ := app.getState() 458 if !run.Reply.CollectCustomEvents { 459 return errCustomEventsRemoteDisabled 460 } 461 462 if !run.Reply.SecurityPolicies.CustomEvents.Enabled() { 463 return errSecurityPolicy 464 } 465 466 app.Consume(run.Reply.RunID, event) 467 468 return nil 469 } 470 471 var ( 472 errMetricInf = errors.New("invalid metric value: inf") 473 errMetricNaN = errors.New("invalid metric value: NaN") 474 errMetricNameEmpty = errors.New("missing metric name") 475 errMetricServerless = errors.New("custom metrics are not currently supported in serverless mode") 476 ) 477 478 // RecordCustomMetric implements newrelic.Application's RecordCustomMetric. 479 func (app *app) RecordCustomMetric(name string, value float64) error { 480 if app.config.ServerlessMode.Enabled { 481 return errMetricServerless 482 } 483 if math.IsNaN(value) { 484 return errMetricNaN 485 } 486 if math.IsInf(value, 0) { 487 return errMetricInf 488 } 489 if "" == name { 490 return errMetricNameEmpty 491 } 492 run, _ := app.getState() 493 app.Consume(run.Reply.RunID, internal.CustomMetric{ 494 RawInputName: name, 495 Value: value, 496 }) 497 return nil 498 } 499 500 var ( 501 _ internal.ServerlessWriter = &app{} 502 ) 503 504 func (app *app) ServerlessWrite(arn string, writer io.Writer) { 505 app.serverless.Write(arn, writer) 506 } 507 508 func (app *app) Consume(id internal.AgentRunID, data internal.Harvestable) { 509 510 app.serverless.Consume(data) 511 512 if nil != app.testHarvest { 513 data.MergeIntoHarvest(app.testHarvest) 514 return 515 } 516 517 if "" == id { 518 return 519 } 520 521 select { 522 case app.dataChan <- appData{id, data}: 523 case <-app.shutdownStarted: 524 } 525 } 526 527 func (app *app) ExpectCustomEvents(t internal.Validator, want []internal.WantEvent) { 528 internal.ExpectCustomEvents(internal.ExtendValidator(t, "custom events"), app.testHarvest.CustomEvents, want) 529 } 530 531 func (app *app) ExpectErrors(t internal.Validator, want []internal.WantError) { 532 t = internal.ExtendValidator(t, "traced errors") 533 internal.ExpectErrors(t, app.testHarvest.ErrorTraces, want) 534 } 535 536 func (app *app) ExpectErrorEvents(t internal.Validator, want []internal.WantEvent) { 537 t = internal.ExtendValidator(t, "error events") 538 internal.ExpectErrorEvents(t, app.testHarvest.ErrorEvents, want) 539 } 540 541 func (app *app) ExpectSpanEvents(t internal.Validator, want []internal.WantEvent) { 542 t = internal.ExtendValidator(t, "spans events") 543 internal.ExpectSpanEvents(t, app.testHarvest.SpanEvents, want) 544 } 545 546 func (app *app) ExpectTxnEvents(t internal.Validator, want []internal.WantEvent) { 547 t = internal.ExtendValidator(t, "txn events") 548 internal.ExpectTxnEvents(t, app.testHarvest.TxnEvents, want) 549 } 550 551 func (app *app) ExpectMetrics(t internal.Validator, want []internal.WantMetric) { 552 t = internal.ExtendValidator(t, "metrics") 553 internal.ExpectMetrics(t, app.testHarvest.Metrics, want) 554 } 555 556 func (app *app) ExpectMetricsPresent(t internal.Validator, want []internal.WantMetric) { 557 t = internal.ExtendValidator(t, "metrics") 558 internal.ExpectMetricsPresent(t, app.testHarvest.Metrics, want) 559 } 560 561 func (app *app) ExpectTxnMetrics(t internal.Validator, want internal.WantTxn) { 562 t = internal.ExtendValidator(t, "metrics") 563 internal.ExpectTxnMetrics(t, app.testHarvest.Metrics, want) 564 } 565 566 func (app *app) ExpectTxnTraces(t internal.Validator, want []internal.WantTxnTrace) { 567 t = internal.ExtendValidator(t, "txn traces") 568 internal.ExpectTxnTraces(t, app.testHarvest.TxnTraces, want) 569 } 570 571 func (app *app) ExpectSlowQueries(t internal.Validator, want []internal.WantSlowQuery) { 572 t = internal.ExtendValidator(t, "slow queries") 573 internal.ExpectSlowQueries(t, app.testHarvest.SlowSQLs, want) 574 }