github.com/waldiirawan/apm-agent-go/v2@v2.2.2/metrics_test.go (about) 1 // Licensed to Elasticsearch B.V. under one or more contributor 2 // license agreements. See the NOTICE file distributed with 3 // this work for additional information regarding copyright 4 // ownership. Elasticsearch B.V. licenses this file to you under 5 // the Apache License, Version 2.0 (the "License"); you may 6 // not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, 12 // software distributed under the License is distributed on an 13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 // KIND, either express or implied. See the License for the 15 // specific language governing permissions and limitations 16 // under the License. 17 18 package apm_test 19 20 import ( 21 "bytes" 22 "context" 23 "fmt" 24 "io" 25 "os" 26 "runtime" 27 "sort" 28 "strconv" 29 "strings" 30 "testing" 31 "text/tabwriter" 32 "time" 33 34 "github.com/stretchr/testify/assert" 35 "github.com/stretchr/testify/require" 36 37 "github.com/waldiirawan/apm-agent-go/v2" 38 "github.com/waldiirawan/apm-agent-go/v2/apmtest" 39 "github.com/waldiirawan/apm-agent-go/v2/model" 40 "github.com/waldiirawan/apm-agent-go/v2/transport/transporttest" 41 ) 42 43 func TestTracerMetricsBuiltin(t *testing.T) { 44 tracer, transport := transporttest.NewRecorderTracer() 45 defer tracer.Close() 46 47 busyWork(10 * time.Millisecond) 48 tracer.SendMetrics(nil) 49 50 payloads := transport.Payloads() 51 builtinMetrics := payloads.Metrics[0] 52 53 assert.Nil(t, builtinMetrics.Labels) 54 assert.NotEmpty(t, builtinMetrics.Timestamp) 55 56 gcPct := builtinMetrics.Samples["golang.heap.gc.cpu_fraction"] 57 if assert.NotNil(t, gcPct.Value) && runtime.GOOS == "linux" { 58 // NOTE(axw) on Windows and macOS, sometimes 59 // MemStats.GCCPUFraction is outside the expected 60 // range [0,1). We should isolate the issue and 61 // report it upstream. 62 assert.Condition(t, func() bool { 63 return gcPct.Value >= 0 && gcPct.Value <= 1 64 }, "value: %v", gcPct.Value) 65 } 66 67 // CPU% should be in the range [0,1], not [0,100]. 68 cpuTotalNormPct := builtinMetrics.Samples["system.cpu.total.norm.pct"] 69 if assert.NotNil(t, gcPct.Value) { 70 assert.Condition(t, func() bool { 71 return cpuTotalNormPct.Value >= 0 && cpuTotalNormPct.Value <= 1 72 }, "value: %v", cpuTotalNormPct.Value) 73 } 74 75 expected := []string{ 76 "golang.goroutines", 77 "golang.heap.allocations.mallocs", 78 "golang.heap.allocations.frees", 79 "golang.heap.allocations.objects", 80 "golang.heap.allocations.total", 81 "golang.heap.allocations.allocated", 82 "golang.heap.allocations.idle", 83 "golang.heap.allocations.active", 84 "golang.heap.system.total", 85 "golang.heap.system.obtained", 86 "golang.heap.system.stack", 87 "golang.heap.system.released", 88 "golang.heap.gc.next_gc_limit", 89 "golang.heap.gc.total_count", 90 "golang.heap.gc.total_pause.ns", 91 "golang.heap.gc.cpu_fraction", 92 93 "system.cpu.total.norm.pct", 94 "system.memory.total", 95 "system.memory.actual.free", 96 "system.process.cpu.total.norm.pct", 97 "system.process.memory.size", 98 "system.process.memory.rss.bytes", 99 } 100 sort.Strings(expected) 101 for name := range builtinMetrics.Samples { 102 assert.Contains(t, expected, name) 103 } 104 105 var buf bytes.Buffer 106 tw := tabwriter.NewWriter(&buf, 10, 4, 2, ' ', 0) 107 fmt.Fprintln(tw, "NAME\tVALUE") 108 for _, name := range expected { 109 assert.Contains(t, builtinMetrics.Samples, name) 110 metric := builtinMetrics.Samples[name] 111 fmt.Fprintf(tw, "%s\t%s\n", name, strconv.FormatFloat(metric.Value, 'f', -1, 64)) 112 } 113 tw.Flush() 114 t.Logf("\n\n%s\n", buf.String()) 115 } 116 117 func TestTracerMetricsInterval(t *testing.T) { 118 tracer, transport := transporttest.NewRecorderTracer() 119 defer tracer.Close() 120 121 interval := 1 * time.Second 122 tracer.SetMetricsInterval(interval) 123 before := time.Now() 124 deadline := before.Add(5 * time.Second) 125 for len(transport.Payloads().Metrics) == 0 { 126 if time.Now().After(deadline) { 127 t.Fatal("timed out waiting for metrics") 128 } 129 time.Sleep(time.Millisecond) 130 } 131 after := time.Now() 132 assert.WithinDuration(t, before.Add(interval), after, 200*time.Millisecond) 133 } 134 135 func TestTracerMetricsGatherer(t *testing.T) { 136 tracer, transport := transporttest.NewRecorderTracer() 137 defer tracer.Close() 138 139 tracer.RegisterMetricsGatherer(apm.GatherMetricsFunc( 140 func(ctx context.Context, m *apm.Metrics) error { 141 m.Add("http.request", []apm.MetricLabel{ 142 {Name: "code", Value: "400"}, 143 {Name: "path", Value: "/"}, 144 }, 3) 145 m.Add("http.request", []apm.MetricLabel{ 146 {Name: "code", Value: "200"}, 147 }, 4) 148 return nil 149 }, 150 )) 151 tracer.SendMetrics(nil) 152 153 payloads := transport.Payloads() 154 metrics1 := payloads.Metrics[1] 155 metrics2 := payloads.Metrics[2] 156 157 assert.Equal(t, model.StringMap{{Key: "code", Value: "200"}}, metrics1.Labels) 158 assert.Equal(t, map[string]model.Metric{"http.request": {Value: 4}}, metrics1.Samples) 159 160 assert.Equal(t, model.StringMap{ 161 {Key: "code", Value: "400"}, 162 {Key: "path", Value: "/"}, 163 }, metrics2.Labels) 164 assert.Equal(t, map[string]model.Metric{"http.request": {Value: 3}}, metrics2.Samples) 165 } 166 167 func TestTracerMetricsDeregister(t *testing.T) { 168 tracer, transport := transporttest.NewRecorderTracer() 169 defer tracer.Close() 170 171 g := apm.GatherMetricsFunc( 172 func(ctx context.Context, m *apm.Metrics) error { 173 m.Add("with_labels", []apm.MetricLabel{ 174 {Name: "code", Value: "200"}, 175 }, 4) 176 return nil 177 }, 178 ) 179 deregister := tracer.RegisterMetricsGatherer(g) 180 deregister() 181 deregister() // safe to call multiple times 182 tracer.SendMetrics(nil) 183 184 payloads := transport.Payloads() 185 metrics := payloads.Metrics 186 require.Len(t, metrics, 1) // just the builtin/unlabeled metrics 187 } 188 189 func TestTracerMetricsBusyTracer(t *testing.T) { 190 os.Setenv("ELASTIC_APM_API_BUFFER_SIZE", "10KB") 191 defer os.Unsetenv("ELASTIC_APM_API_BUFFER_SIZE") 192 193 var recorder transporttest.RecorderTransport 194 firstRequestDone := make(chan struct{}) 195 transport := sendStreamFunc(func(ctx context.Context, r io.Reader) error { 196 if firstRequestDone != nil { 197 firstRequestDone <- struct{}{} 198 firstRequestDone = nil 199 return nil 200 } 201 return recorder.SendStream(ctx, r) 202 }) 203 204 tracer, err := apm.NewTracerOptions(apm.TracerOptions{ 205 ServiceName: "transporttest", 206 Transport: transport, 207 }) 208 require.NoError(t, err) 209 defer tracer.Close() 210 211 // Force a complete request to be flushed, preventing metrics from 212 // being added to the request buffer until we unblock the transport. 213 nonblocking := make(chan struct{}) 214 close(nonblocking) 215 tracer.StartTransaction("name", "type").End() 216 tracer.Flush(nonblocking) 217 218 const interval = 100 * time.Millisecond 219 tracer.SetMetricsInterval(interval) 220 for i := 0; i < 5; i++ { 221 time.Sleep(interval) 222 } 223 for i := 0; i < 1000; i++ { 224 tx := tracer.StartTransaction( 225 strings.Repeat("x", 1024), 226 strings.Repeat("y", 1024), 227 ) 228 tx.Context.SetLabel(strings.Repeat("a", 7000), "v") 229 tx.End() 230 } 231 232 <-firstRequestDone 233 tracer.Flush(nil) // wait for possibly-latent flush 234 tracer.Flush(nil) // wait for buffered events to be flushed 235 236 assert.NotZero(t, recorder.Payloads().Metrics) 237 } 238 239 func TestTracerMetricsBuffered(t *testing.T) { 240 var recorder transporttest.RecorderTransport 241 unblock := make(chan struct{}) 242 tracer, _ := apm.NewTracerOptions(apm.TracerOptions{ 243 Transport: sendStreamFunc(func(ctx context.Context, r io.Reader) error { 244 select { 245 case <-ctx.Done(): 246 return ctx.Err() 247 case <-unblock: 248 return recorder.SendStream(ctx, r) 249 } 250 }), 251 }) 252 defer tracer.Close() 253 254 gathered := make(chan struct{}) 255 tracer.RegisterMetricsGatherer(apm.GatherMetricsFunc( 256 func(ctx context.Context, m *apm.Metrics) error { 257 select { 258 case <-ctx.Done(): 259 return ctx.Err() 260 case gathered <- struct{}{}: 261 } 262 return nil 263 }, 264 )) 265 tracer.SetMetricsInterval(10 * time.Millisecond) 266 267 // Wait for metrics to be gathered several times, and then unblock 268 // the transport and check that the metrics were buffered while 269 // the transport was blocked. 270 timeout := time.After(5 * time.Second) 271 const N = 5 272 for i := 0; i < N+1; i++ { 273 select { 274 case <-timeout: 275 t.Fatal("timed out waiting for metrics gatherer to be called") 276 case <-gathered: 277 } 278 } 279 unblock <- struct{}{} 280 tracer.Flush(nil) // wait for buffered metrics to be flushed 281 282 metrics := recorder.Payloads().Metrics 283 if assert.Conditionf(t, func() bool { return len(metrics) >= N }, "len(metrics): %d", len(metrics)) { 284 for i, m := range metrics[1:] { 285 assert.NotEqual(t, metrics[i].Timestamp, m.Timestamp) 286 } 287 } 288 } 289 290 func TestTracerDisableMetrics(t *testing.T) { 291 os.Setenv("ELASTIC_APM_DISABLE_METRICS", "golang.heap.*, system.memory.*, system.process.*") 292 defer os.Unsetenv("ELASTIC_APM_DISABLE_METRICS") 293 294 tracer, transport := transporttest.NewRecorderTracer() 295 defer tracer.Close() 296 297 tracer.SendMetrics(nil) 298 299 payloads := transport.Payloads() 300 builtinMetrics := payloads.Metrics[0] 301 302 expected := []string{"golang.goroutines", "system.cpu.total.norm.pct"} 303 var actual []string 304 for name := range builtinMetrics.Samples { 305 actual = append(actual, name) 306 } 307 sort.Strings(actual) 308 assert.EqualValues(t, expected, actual) 309 } 310 311 func TestTracerMetricsNotRecording(t *testing.T) { 312 tracer := apmtest.NewRecordingTracer() 313 defer tracer.Close() 314 tracer.SetRecording(false) 315 testTracerMetricsNotRecording(t, tracer) 316 } 317 318 func testTracerMetricsNotRecording(t *testing.T, tracer *apmtest.RecordingTracer) { 319 done := make(chan struct{}) 320 defer close(done) 321 322 gathered := make(chan struct{}) 323 tracer.RegisterMetricsGatherer(apm.GatherMetricsFunc( 324 func(ctx context.Context, m *apm.Metrics) error { 325 select { 326 case <-ctx.Done(): 327 return ctx.Err() 328 case <-done: 329 case gathered <- struct{}{}: 330 } 331 return nil 332 }, 333 )) 334 335 tracer.SetMetricsInterval(time.Millisecond) 336 defer tracer.SetMetricsInterval(0) // disable at end 337 338 sent := make(chan struct{}) 339 go func() { 340 defer close(sent) 341 tracer.SendMetrics(nil) // unblocked by tracer.Close 342 }() 343 344 // Because the tracer is configured to not record, 345 // the metrics gatherer should never be called. 346 select { 347 case <-time.After(100 * time.Millisecond): 348 case <-sent: 349 t.Fatal("expected SendMetrics to block") 350 case <-gathered: 351 t.Fatal("unexpected metrics gatherer call") 352 } 353 354 tracer.Flush(nil) // empty queue, should not block 355 payloads := tracer.Payloads() 356 require.Empty(t, payloads.Metrics) 357 } 358 359 // busyWork does meaningless work for the specified duration, 360 // so we can observe CPU usage. 361 func busyWork(d time.Duration) int { 362 var n int 363 afterCh := time.After(d) 364 for { 365 select { 366 case <-afterCh: 367 return n 368 default: 369 n++ 370 } 371 } 372 } 373 374 type sendStreamFunc func(context.Context, io.Reader) error 375 376 func (f sendStreamFunc) SendStream(ctx context.Context, r io.Reader) error { 377 return f(ctx, r) 378 }