github.com/waldiirawan/apm-agent-go/v2@v2.2.2/metrics_test.go (about)

     1  // Licensed to Elasticsearch B.V. under one or more contributor
     2  // license agreements. See the NOTICE file distributed with
     3  // this work for additional information regarding copyright
     4  // ownership. Elasticsearch B.V. licenses this file to you under
     5  // the Apache License, Version 2.0 (the "License"); you may
     6  // not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing,
    12  // software distributed under the License is distributed on an
    13  // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    14  // KIND, either express or implied.  See the License for the
    15  // specific language governing permissions and limitations
    16  // under the License.
    17  
    18  package apm_test
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"fmt"
    24  	"io"
    25  	"os"
    26  	"runtime"
    27  	"sort"
    28  	"strconv"
    29  	"strings"
    30  	"testing"
    31  	"text/tabwriter"
    32  	"time"
    33  
    34  	"github.com/stretchr/testify/assert"
    35  	"github.com/stretchr/testify/require"
    36  
    37  	"github.com/waldiirawan/apm-agent-go/v2"
    38  	"github.com/waldiirawan/apm-agent-go/v2/apmtest"
    39  	"github.com/waldiirawan/apm-agent-go/v2/model"
    40  	"github.com/waldiirawan/apm-agent-go/v2/transport/transporttest"
    41  )
    42  
    43  func TestTracerMetricsBuiltin(t *testing.T) {
    44  	tracer, transport := transporttest.NewRecorderTracer()
    45  	defer tracer.Close()
    46  
    47  	busyWork(10 * time.Millisecond)
    48  	tracer.SendMetrics(nil)
    49  
    50  	payloads := transport.Payloads()
    51  	builtinMetrics := payloads.Metrics[0]
    52  
    53  	assert.Nil(t, builtinMetrics.Labels)
    54  	assert.NotEmpty(t, builtinMetrics.Timestamp)
    55  
    56  	gcPct := builtinMetrics.Samples["golang.heap.gc.cpu_fraction"]
    57  	if assert.NotNil(t, gcPct.Value) && runtime.GOOS == "linux" {
    58  		// NOTE(axw) on Windows and macOS, sometimes
    59  		// MemStats.GCCPUFraction is outside the expected
    60  		// range [0,1). We should isolate the issue and
    61  		// report it upstream.
    62  		assert.Condition(t, func() bool {
    63  			return gcPct.Value >= 0 && gcPct.Value <= 1
    64  		}, "value: %v", gcPct.Value)
    65  	}
    66  
    67  	// CPU% should be in the range [0,1], not [0,100].
    68  	cpuTotalNormPct := builtinMetrics.Samples["system.cpu.total.norm.pct"]
    69  	if assert.NotNil(t, gcPct.Value) {
    70  		assert.Condition(t, func() bool {
    71  			return cpuTotalNormPct.Value >= 0 && cpuTotalNormPct.Value <= 1
    72  		}, "value: %v", cpuTotalNormPct.Value)
    73  	}
    74  
    75  	expected := []string{
    76  		"golang.goroutines",
    77  		"golang.heap.allocations.mallocs",
    78  		"golang.heap.allocations.frees",
    79  		"golang.heap.allocations.objects",
    80  		"golang.heap.allocations.total",
    81  		"golang.heap.allocations.allocated",
    82  		"golang.heap.allocations.idle",
    83  		"golang.heap.allocations.active",
    84  		"golang.heap.system.total",
    85  		"golang.heap.system.obtained",
    86  		"golang.heap.system.stack",
    87  		"golang.heap.system.released",
    88  		"golang.heap.gc.next_gc_limit",
    89  		"golang.heap.gc.total_count",
    90  		"golang.heap.gc.total_pause.ns",
    91  		"golang.heap.gc.cpu_fraction",
    92  
    93  		"system.cpu.total.norm.pct",
    94  		"system.memory.total",
    95  		"system.memory.actual.free",
    96  		"system.process.cpu.total.norm.pct",
    97  		"system.process.memory.size",
    98  		"system.process.memory.rss.bytes",
    99  	}
   100  	sort.Strings(expected)
   101  	for name := range builtinMetrics.Samples {
   102  		assert.Contains(t, expected, name)
   103  	}
   104  
   105  	var buf bytes.Buffer
   106  	tw := tabwriter.NewWriter(&buf, 10, 4, 2, ' ', 0)
   107  	fmt.Fprintln(tw, "NAME\tVALUE")
   108  	for _, name := range expected {
   109  		assert.Contains(t, builtinMetrics.Samples, name)
   110  		metric := builtinMetrics.Samples[name]
   111  		fmt.Fprintf(tw, "%s\t%s\n", name, strconv.FormatFloat(metric.Value, 'f', -1, 64))
   112  	}
   113  	tw.Flush()
   114  	t.Logf("\n\n%s\n", buf.String())
   115  }
   116  
   117  func TestTracerMetricsInterval(t *testing.T) {
   118  	tracer, transport := transporttest.NewRecorderTracer()
   119  	defer tracer.Close()
   120  
   121  	interval := 1 * time.Second
   122  	tracer.SetMetricsInterval(interval)
   123  	before := time.Now()
   124  	deadline := before.Add(5 * time.Second)
   125  	for len(transport.Payloads().Metrics) == 0 {
   126  		if time.Now().After(deadline) {
   127  			t.Fatal("timed out waiting for metrics")
   128  		}
   129  		time.Sleep(time.Millisecond)
   130  	}
   131  	after := time.Now()
   132  	assert.WithinDuration(t, before.Add(interval), after, 200*time.Millisecond)
   133  }
   134  
   135  func TestTracerMetricsGatherer(t *testing.T) {
   136  	tracer, transport := transporttest.NewRecorderTracer()
   137  	defer tracer.Close()
   138  
   139  	tracer.RegisterMetricsGatherer(apm.GatherMetricsFunc(
   140  		func(ctx context.Context, m *apm.Metrics) error {
   141  			m.Add("http.request", []apm.MetricLabel{
   142  				{Name: "code", Value: "400"},
   143  				{Name: "path", Value: "/"},
   144  			}, 3)
   145  			m.Add("http.request", []apm.MetricLabel{
   146  				{Name: "code", Value: "200"},
   147  			}, 4)
   148  			return nil
   149  		},
   150  	))
   151  	tracer.SendMetrics(nil)
   152  
   153  	payloads := transport.Payloads()
   154  	metrics1 := payloads.Metrics[1]
   155  	metrics2 := payloads.Metrics[2]
   156  
   157  	assert.Equal(t, model.StringMap{{Key: "code", Value: "200"}}, metrics1.Labels)
   158  	assert.Equal(t, map[string]model.Metric{"http.request": {Value: 4}}, metrics1.Samples)
   159  
   160  	assert.Equal(t, model.StringMap{
   161  		{Key: "code", Value: "400"},
   162  		{Key: "path", Value: "/"},
   163  	}, metrics2.Labels)
   164  	assert.Equal(t, map[string]model.Metric{"http.request": {Value: 3}}, metrics2.Samples)
   165  }
   166  
   167  func TestTracerMetricsDeregister(t *testing.T) {
   168  	tracer, transport := transporttest.NewRecorderTracer()
   169  	defer tracer.Close()
   170  
   171  	g := apm.GatherMetricsFunc(
   172  		func(ctx context.Context, m *apm.Metrics) error {
   173  			m.Add("with_labels", []apm.MetricLabel{
   174  				{Name: "code", Value: "200"},
   175  			}, 4)
   176  			return nil
   177  		},
   178  	)
   179  	deregister := tracer.RegisterMetricsGatherer(g)
   180  	deregister()
   181  	deregister() // safe to call multiple times
   182  	tracer.SendMetrics(nil)
   183  
   184  	payloads := transport.Payloads()
   185  	metrics := payloads.Metrics
   186  	require.Len(t, metrics, 1) // just the builtin/unlabeled metrics
   187  }
   188  
   189  func TestTracerMetricsBusyTracer(t *testing.T) {
   190  	os.Setenv("ELASTIC_APM_API_BUFFER_SIZE", "10KB")
   191  	defer os.Unsetenv("ELASTIC_APM_API_BUFFER_SIZE")
   192  
   193  	var recorder transporttest.RecorderTransport
   194  	firstRequestDone := make(chan struct{})
   195  	transport := sendStreamFunc(func(ctx context.Context, r io.Reader) error {
   196  		if firstRequestDone != nil {
   197  			firstRequestDone <- struct{}{}
   198  			firstRequestDone = nil
   199  			return nil
   200  		}
   201  		return recorder.SendStream(ctx, r)
   202  	})
   203  
   204  	tracer, err := apm.NewTracerOptions(apm.TracerOptions{
   205  		ServiceName: "transporttest",
   206  		Transport:   transport,
   207  	})
   208  	require.NoError(t, err)
   209  	defer tracer.Close()
   210  
   211  	// Force a complete request to be flushed, preventing metrics from
   212  	// being added to the request buffer until we unblock the transport.
   213  	nonblocking := make(chan struct{})
   214  	close(nonblocking)
   215  	tracer.StartTransaction("name", "type").End()
   216  	tracer.Flush(nonblocking)
   217  
   218  	const interval = 100 * time.Millisecond
   219  	tracer.SetMetricsInterval(interval)
   220  	for i := 0; i < 5; i++ {
   221  		time.Sleep(interval)
   222  	}
   223  	for i := 0; i < 1000; i++ {
   224  		tx := tracer.StartTransaction(
   225  			strings.Repeat("x", 1024),
   226  			strings.Repeat("y", 1024),
   227  		)
   228  		tx.Context.SetLabel(strings.Repeat("a", 7000), "v")
   229  		tx.End()
   230  	}
   231  
   232  	<-firstRequestDone
   233  	tracer.Flush(nil) // wait for possibly-latent flush
   234  	tracer.Flush(nil) // wait for buffered events to be flushed
   235  
   236  	assert.NotZero(t, recorder.Payloads().Metrics)
   237  }
   238  
   239  func TestTracerMetricsBuffered(t *testing.T) {
   240  	var recorder transporttest.RecorderTransport
   241  	unblock := make(chan struct{})
   242  	tracer, _ := apm.NewTracerOptions(apm.TracerOptions{
   243  		Transport: sendStreamFunc(func(ctx context.Context, r io.Reader) error {
   244  			select {
   245  			case <-ctx.Done():
   246  				return ctx.Err()
   247  			case <-unblock:
   248  				return recorder.SendStream(ctx, r)
   249  			}
   250  		}),
   251  	})
   252  	defer tracer.Close()
   253  
   254  	gathered := make(chan struct{})
   255  	tracer.RegisterMetricsGatherer(apm.GatherMetricsFunc(
   256  		func(ctx context.Context, m *apm.Metrics) error {
   257  			select {
   258  			case <-ctx.Done():
   259  				return ctx.Err()
   260  			case gathered <- struct{}{}:
   261  			}
   262  			return nil
   263  		},
   264  	))
   265  	tracer.SetMetricsInterval(10 * time.Millisecond)
   266  
   267  	// Wait for metrics to be gathered several times, and then unblock
   268  	// the transport and check that the metrics were buffered while
   269  	// the transport was blocked.
   270  	timeout := time.After(5 * time.Second)
   271  	const N = 5
   272  	for i := 0; i < N+1; i++ {
   273  		select {
   274  		case <-timeout:
   275  			t.Fatal("timed out waiting for metrics gatherer to be called")
   276  		case <-gathered:
   277  		}
   278  	}
   279  	unblock <- struct{}{}
   280  	tracer.Flush(nil) // wait for buffered metrics to be flushed
   281  
   282  	metrics := recorder.Payloads().Metrics
   283  	if assert.Conditionf(t, func() bool { return len(metrics) >= N }, "len(metrics): %d", len(metrics)) {
   284  		for i, m := range metrics[1:] {
   285  			assert.NotEqual(t, metrics[i].Timestamp, m.Timestamp)
   286  		}
   287  	}
   288  }
   289  
   290  func TestTracerDisableMetrics(t *testing.T) {
   291  	os.Setenv("ELASTIC_APM_DISABLE_METRICS", "golang.heap.*, system.memory.*, system.process.*")
   292  	defer os.Unsetenv("ELASTIC_APM_DISABLE_METRICS")
   293  
   294  	tracer, transport := transporttest.NewRecorderTracer()
   295  	defer tracer.Close()
   296  
   297  	tracer.SendMetrics(nil)
   298  
   299  	payloads := transport.Payloads()
   300  	builtinMetrics := payloads.Metrics[0]
   301  
   302  	expected := []string{"golang.goroutines", "system.cpu.total.norm.pct"}
   303  	var actual []string
   304  	for name := range builtinMetrics.Samples {
   305  		actual = append(actual, name)
   306  	}
   307  	sort.Strings(actual)
   308  	assert.EqualValues(t, expected, actual)
   309  }
   310  
   311  func TestTracerMetricsNotRecording(t *testing.T) {
   312  	tracer := apmtest.NewRecordingTracer()
   313  	defer tracer.Close()
   314  	tracer.SetRecording(false)
   315  	testTracerMetricsNotRecording(t, tracer)
   316  }
   317  
   318  func testTracerMetricsNotRecording(t *testing.T, tracer *apmtest.RecordingTracer) {
   319  	done := make(chan struct{})
   320  	defer close(done)
   321  
   322  	gathered := make(chan struct{})
   323  	tracer.RegisterMetricsGatherer(apm.GatherMetricsFunc(
   324  		func(ctx context.Context, m *apm.Metrics) error {
   325  			select {
   326  			case <-ctx.Done():
   327  				return ctx.Err()
   328  			case <-done:
   329  			case gathered <- struct{}{}:
   330  			}
   331  			return nil
   332  		},
   333  	))
   334  
   335  	tracer.SetMetricsInterval(time.Millisecond)
   336  	defer tracer.SetMetricsInterval(0) // disable at end
   337  
   338  	sent := make(chan struct{})
   339  	go func() {
   340  		defer close(sent)
   341  		tracer.SendMetrics(nil) // unblocked by tracer.Close
   342  	}()
   343  
   344  	// Because the tracer is configured to not record,
   345  	// the metrics gatherer should never be called.
   346  	select {
   347  	case <-time.After(100 * time.Millisecond):
   348  	case <-sent:
   349  		t.Fatal("expected SendMetrics to block")
   350  	case <-gathered:
   351  		t.Fatal("unexpected metrics gatherer call")
   352  	}
   353  
   354  	tracer.Flush(nil) // empty queue, should not block
   355  	payloads := tracer.Payloads()
   356  	require.Empty(t, payloads.Metrics)
   357  }
   358  
   359  // busyWork does meaningless work for the specified duration,
   360  // so we can observe CPU usage.
   361  func busyWork(d time.Duration) int {
   362  	var n int
   363  	afterCh := time.After(d)
   364  	for {
   365  		select {
   366  		case <-afterCh:
   367  			return n
   368  		default:
   369  			n++
   370  		}
   371  	}
   372  }
   373  
   374  type sendStreamFunc func(context.Context, io.Reader) error
   375  
   376  func (f sendStreamFunc) SendStream(ctx context.Context, r io.Reader) error {
   377  	return f(ctx, r)
   378  }