github.com/palantir/witchcraft-go-server/v2@v2.76.0/integration/metrics_test.go (about)

     1  // Copyright (c) 2018 Palantir Technologies. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package integration
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"net/http"
    24  	"runtime"
    25  	"strings"
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/palantir/pkg/httpserver"
    30  	"github.com/palantir/pkg/metrics"
    31  	"github.com/palantir/witchcraft-go-logging/conjure/witchcraft/api/logging"
    32  	"github.com/palantir/witchcraft-go-logging/wlog/auditlog/audit2log"
    33  	"github.com/palantir/witchcraft-go-server/v2/config"
    34  	"github.com/palantir/witchcraft-go-server/v2/witchcraft"
    35  	"github.com/stretchr/testify/assert"
    36  	"github.com/stretchr/testify/require"
    37  )
    38  
    39  // TestEmitMetrics verifies that metrics are printed periodically by a Witchcraft server and that the emitted values
    40  // respect the default blacklist. We verify both custom metrics set in the InitFunc (with tags) and server.response
    41  // metrics from the metrics middleware.
    42  func TestEmitMetrics(t *testing.T) {
    43  	logOutputBuffer := &bytes.Buffer{}
    44  	port, err := httpserver.AvailablePort()
    45  	require.NoError(t, err)
    46  
    47  	// ensure that registry used in this test is unique/does not have any past metrics registered on it
    48  	metrics.DefaultMetricsRegistry = metrics.NewRootMetricsRegistry()
    49  	server, serverErr, cleanup := createAndRunCustomTestServer(t, port, port, func(ctx context.Context, info witchcraft.InitInfo) (deferFn func(), rErr error) {
    50  		ctx = metrics.AddTags(ctx, metrics.MustNewTag("key", "val"))
    51  		metrics.FromContext(ctx).Counter("my-counter").Inc(13)
    52  		return nil, info.Router.Post("/error", http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
    53  			rw.WriteHeader(500)
    54  		}))
    55  	}, logOutputBuffer, func(t *testing.T, initFn witchcraft.InitFunc, installCfg config.Install, logOutputBuffer io.Writer) *witchcraft.Server {
    56  		installCfg.MetricsEmitFrequency = 100 * time.Millisecond
    57  		return createTestServer(t, initFn, installCfg, logOutputBuffer)
    58  	})
    59  	defer func() {
    60  		require.NoError(t, server.Close())
    61  	}()
    62  	defer cleanup()
    63  
    64  	// Make POST that will 404 to trigger request size and error rate metrics
    65  	_, err = testServerClient().Post(fmt.Sprintf("https://localhost:%d/%s/%s", port, basePath, "error"), "application/json", strings.NewReader("{}"))
    66  	require.NoError(t, err)
    67  
    68  	// Allow the metric emitter to do its thing.
    69  	time.Sleep(150 * time.Millisecond)
    70  
    71  	parts := strings.Split(logOutputBuffer.String(), "\n")
    72  	var metricLogs []logging.MetricLogV1
    73  	for _, curr := range parts {
    74  		if strings.Contains(curr, `"metric.1"`) {
    75  			var currLog logging.MetricLogV1
    76  			require.NoError(t, json.Unmarshal([]byte(curr), &currLog))
    77  			metricLogs = append(metricLogs, currLog)
    78  		}
    79  	}
    80  
    81  	var (
    82  		seenLoggingSLS,
    83  		seenLoggingSLSLength,
    84  		seenMyCounter,
    85  		seenResponseTimer,
    86  		seenUptime,
    87  		seenResponseSize,
    88  		seenRequestSize,
    89  		seenResponseError bool
    90  	)
    91  	for _, metricLog := range metricLogs {
    92  		switch metricLog.MetricName {
    93  		case "logging.sls":
    94  			seenLoggingSLS = true
    95  			assert.Equal(t, "meter", metricLog.MetricType, "logging.sls metric had incorrect type")
    96  			assert.NotNil(t, metricLog.Values["count"])
    97  			assert.NotNil(t, metricLog.Tags["type"])
    98  
    99  			metricTagLevel := metricLog.Tags["level"]
   100  			if metricLog.Tags["type"] == "service.1" {
   101  				assert.NotEqual(t, "", metricTagLevel)
   102  			} else {
   103  				assert.Equal(t, "", metricTagLevel)
   104  			}
   105  		case "logging.sls.length":
   106  			seenLoggingSLSLength = true
   107  			assert.Equal(t, "histogram", metricLog.MetricType, "logging.sls.length metric had incorrect type")
   108  			assert.NotNil(t, metricLog.Values["max"])
   109  			assert.NotNil(t, metricLog.Values["p95"])
   110  			assert.NotNil(t, metricLog.Values["p99"])
   111  			assert.NotNil(t, metricLog.Values["count"])
   112  			assert.NotNil(t, metricLog.Tags["type"])
   113  		case "my-counter":
   114  			seenMyCounter = true
   115  			assert.Equal(t, "counter", metricLog.MetricType, "my-counter metric had incorrect type")
   116  			assert.Equal(t, map[string]interface{}{"count": json.Number("13")}, metricLog.Values)
   117  			assert.Equal(t, map[string]string{"key": "val"}, metricLog.Tags)
   118  		case "server.response":
   119  			seenResponseTimer = true
   120  			assert.Equal(t, "timer", metricLog.MetricType, "server.response metric had incorrect type")
   121  			assert.NotNil(t, metricLog.Values["count"])
   122  			assert.NotNil(t, metricLog.Values["max"])
   123  			assert.NotNil(t, metricLog.Values["p95"])
   124  			assert.NotNil(t, metricLog.Values["p99"])
   125  
   126  			// keys are part of the default blacklist and should thus be nil
   127  			assert.Nil(t, metricLog.Values["1m"])
   128  			assert.Nil(t, metricLog.Values["5m"])
   129  			assert.Nil(t, metricLog.Values["15m"])
   130  			assert.Nil(t, metricLog.Values["meanRate"])
   131  			assert.Nil(t, metricLog.Values["min"])
   132  			assert.Nil(t, metricLog.Values["mean"])
   133  			assert.Nil(t, metricLog.Values["stddev"])
   134  			assert.Nil(t, metricLog.Values["p50"])
   135  		case "server.request.size":
   136  			seenRequestSize = true
   137  			assert.Equal(t, "histogram", metricLog.MetricType, "server.response metric had incorrect type")
   138  			assert.NotNil(t, metricLog.Values["max"])
   139  			assert.NotNil(t, metricLog.Values["p95"])
   140  			assert.NotNil(t, metricLog.Values["p99"])
   141  			assert.NotNil(t, metricLog.Values["count"])
   142  
   143  			// keys are part of the default blacklist and should thus be nil
   144  			assert.Nil(t, metricLog.Values["min"])
   145  			assert.Nil(t, metricLog.Values["mean"])
   146  			assert.Nil(t, metricLog.Values["stddev"])
   147  			assert.Nil(t, metricLog.Values["p50"])
   148  		case "server.response.size":
   149  			seenResponseSize = true
   150  			assert.Equal(t, "histogram", metricLog.MetricType, "server.response metric had incorrect type")
   151  			assert.NotNil(t, metricLog.Values["max"])
   152  			assert.NotNil(t, metricLog.Values["p95"])
   153  			assert.NotNil(t, metricLog.Values["p99"])
   154  			assert.NotNil(t, metricLog.Values["count"])
   155  
   156  			// keys are part of the default blacklist and should thus be nil
   157  			assert.Nil(t, metricLog.Values["min"])
   158  			assert.Nil(t, metricLog.Values["mean"])
   159  			assert.Nil(t, metricLog.Values["stddev"])
   160  			assert.Nil(t, metricLog.Values["p50"])
   161  		case "server.response.error":
   162  			seenResponseError = true
   163  			assert.Equal(t, "meter", metricLog.MetricType, "server.response metric had incorrect type")
   164  			assert.NotNil(t, metricLog.Values["count"])
   165  
   166  			// keys are part of the default blacklist and should thus be nil
   167  			assert.Nil(t, metricLog.Values["1m"])
   168  			assert.Nil(t, metricLog.Values["5m"])
   169  			assert.Nil(t, metricLog.Values["15m"])
   170  			assert.Nil(t, metricLog.Values["mean"])
   171  		case "server.uptime":
   172  			seenUptime = true
   173  			assert.Equal(t, "gauge", metricLog.MetricType, "server.uptime metric had incorrect type")
   174  			assert.Equal(t, map[string]string{
   175  				"go_os":      runtime.GOOS,
   176  				"go_arch":    runtime.GOARCH,
   177  				"go_version": runtime.Version(),
   178  			}, metricLog.Tags)
   179  			assert.NotZero(t, metricLog.Values["value"])
   180  		default:
   181  			assert.Fail(t, "unexpected metric encountered", "%s", metricLog.MetricName)
   182  		}
   183  	}
   184  
   185  	assert.True(t, seenLoggingSLS, "logging.sls metric was not emitted")
   186  	assert.True(t, seenLoggingSLSLength, "logging.sls.length metric was not emitted")
   187  	assert.True(t, seenMyCounter, "my-counter metric was not emitted")
   188  	assert.True(t, seenResponseTimer, "server.response metric was not emitted")
   189  	assert.True(t, seenRequestSize, "server.request.size metric was not emitted")
   190  	assert.True(t, seenResponseSize, "server.response.size metric was not emitted")
   191  	assert.True(t, seenResponseError, "server.response.error metric was not emitted")
   192  	assert.True(t, seenUptime, "server.uptime metric was not emitted")
   193  
   194  	select {
   195  	case err := <-serverErr:
   196  		require.NoError(t, err)
   197  	default:
   198  	}
   199  }
   200  
   201  // TestEmitMetricsZeroValue verifies that for meter, timer and histogram metrics, a zero value metric log entry is
   202  // emitted before a regular entry is emitted.
   203  func TestEmitMetricsZeroValue(t *testing.T) {
   204  	logOutputBuffer := &bytes.Buffer{}
   205  	port, err := httpserver.AvailablePort()
   206  	require.NoError(t, err)
   207  
   208  	// ensure that registry used in this test is unique/does not have any past metrics registered on it
   209  	metrics.DefaultMetricsRegistry = metrics.NewRootMetricsRegistry()
   210  	server, serverErr, cleanup := createAndRunCustomTestServer(t, port, port, func(ctx context.Context, info witchcraft.InitInfo) (deferFn func(), rErr error) {
   211  		ctx = metrics.AddTags(ctx, metrics.MustNewTag("key", "val"))
   212  		metrics.FromContext(ctx).Counter("my-counter").Inc(13)
   213  		return nil, info.Router.Post("/error", http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
   214  			rw.WriteHeader(500)
   215  		}))
   216  	}, logOutputBuffer, func(t *testing.T, initFn witchcraft.InitFunc, installCfg config.Install, logOutputBuffer io.Writer) *witchcraft.Server {
   217  		installCfg.MetricsEmitFrequency = 100 * time.Millisecond
   218  		return createTestServer(t, initFn, installCfg, logOutputBuffer)
   219  	})
   220  	defer func() {
   221  		require.NoError(t, server.Close())
   222  	}()
   223  	defer cleanup()
   224  
   225  	// Allow the metric emitter to do its thing.
   226  	time.Sleep(150 * time.Millisecond)
   227  
   228  	parts := strings.Split(logOutputBuffer.String(), "\n")
   229  	var metricLogs []logging.MetricLogV1
   230  	for _, curr := range parts {
   231  		if strings.Contains(curr, `"metric.1"`) {
   232  			var currLog logging.MetricLogV1
   233  			require.NoError(t, json.Unmarshal([]byte(curr), &currLog))
   234  			metricLogs = append(metricLogs, currLog)
   235  		}
   236  	}
   237  
   238  	var (
   239  		seenLoggingSLSMeterZero,
   240  		seenLoggingSLSMeter,
   241  		seenLoggingSLSLengthHistogramZero,
   242  		seenLoggingSLSLengthHistogram,
   243  		seenResponseTimerZero,
   244  		seenResponseTimer bool
   245  	)
   246  	for _, metricLog := range metricLogs {
   247  		switch metricLog.MetricName {
   248  		case "logging.sls":
   249  			assert.Equal(t, "meter", metricLog.MetricType, "logging.sls metric had incorrect type")
   250  			assert.NotNil(t, metricLog.Values["count"])
   251  			assert.NotNil(t, metricLog.Tags["type"])
   252  
   253  			if metricLog.Values["count"] == json.Number("0") {
   254  				seenLoggingSLSMeterZero = true
   255  			} else {
   256  				seenLoggingSLSMeter = true
   257  				if !seenLoggingSLSMeterZero {
   258  					assert.Fail(t, "encountered non-zero logging.sls meter value before the zero value")
   259  				}
   260  			}
   261  			metricTagLevel := metricLog.Tags["level"]
   262  			if metricLog.Tags["type"] == "service.1" {
   263  				assert.NotEqual(t, "", metricTagLevel)
   264  			} else {
   265  				assert.Equal(t, "", metricTagLevel)
   266  			}
   267  		case "logging.sls.length":
   268  			assert.Equal(t, "histogram", metricLog.MetricType, "logging.sls.length metric had incorrect type")
   269  			assert.NotNil(t, metricLog.Values["max"])
   270  			assert.NotNil(t, metricLog.Values["p95"])
   271  			assert.NotNil(t, metricLog.Values["p99"])
   272  			assert.NotNil(t, metricLog.Values["count"])
   273  			assert.NotNil(t, metricLog.Tags["type"])
   274  
   275  			if metricLog.Values["count"] == json.Number("0") {
   276  				seenLoggingSLSLengthHistogramZero = true
   277  			} else {
   278  				seenLoggingSLSLengthHistogram = true
   279  				if !seenLoggingSLSLengthHistogramZero {
   280  					assert.Fail(t, "encountered non-zero logging.sls meter value before the zero value")
   281  				}
   282  			}
   283  		case "server.response":
   284  			assert.Equal(t, "timer", metricLog.MetricType, "server.response metric had incorrect type")
   285  			assert.NotNil(t, metricLog.Values["count"])
   286  			assert.NotNil(t, metricLog.Values["max"])
   287  			assert.NotNil(t, metricLog.Values["p95"])
   288  			assert.NotNil(t, metricLog.Values["p99"])
   289  
   290  			// keys are part of the default blacklist and should thus be nil
   291  			assert.Nil(t, metricLog.Values["1m"])
   292  			assert.Nil(t, metricLog.Values["5m"])
   293  			assert.Nil(t, metricLog.Values["15m"])
   294  			assert.Nil(t, metricLog.Values["meanRate"])
   295  			assert.Nil(t, metricLog.Values["min"])
   296  			assert.Nil(t, metricLog.Values["mean"])
   297  			assert.Nil(t, metricLog.Values["stddev"])
   298  			assert.Nil(t, metricLog.Values["p50"])
   299  
   300  			if metricLog.Values["count"] == json.Number("0") {
   301  				seenResponseTimerZero = true
   302  			} else {
   303  				seenResponseTimer = true
   304  				if !seenResponseTimerZero {
   305  					assert.Fail(t, "encountered non-zero server.response timer value before the zero value")
   306  				}
   307  			}
   308  		default:
   309  			// do nothing (okay if extra metrics are emitted)
   310  		}
   311  	}
   312  
   313  	assert.True(t, seenLoggingSLSMeterZero, "logging.sls metric zero value was not emitted")
   314  	assert.True(t, seenLoggingSLSMeter, "logging.sls metric was not emitted")
   315  	assert.True(t, seenLoggingSLSLengthHistogramZero, "logging.sls.length metric zero value was not emitted")
   316  	assert.True(t, seenLoggingSLSLengthHistogram, "logging.sls.length metric was not emitted")
   317  	assert.True(t, seenResponseTimerZero, "server.response metric zero value was not emitted")
   318  	assert.True(t, seenResponseTimer, "server.response metric was not emitted")
   319  
   320  	select {
   321  	case err := <-serverErr:
   322  		require.NoError(t, err)
   323  	default:
   324  	}
   325  }
   326  
   327  // TestMetricWriter verifies that logs backed by MetricWriters produces exactly one sls.logging.length metric per log line.
   328  // While initializing the testServer, we don't expect any other Audit logs to occur, so we log one line
   329  // and ensure we only see one sls.logging.length metric for each.
   330  func TestMetricWriter(t *testing.T) {
   331  	logOutputBuffer := &bytes.Buffer{}
   332  	port, err := httpserver.AvailablePort()
   333  	require.NoError(t, err)
   334  
   335  	superLongLogLine := "super long line"
   336  	for i := 0; i < 15; i++ {
   337  		superLongLogLine += " " + superLongLogLine
   338  	}
   339  
   340  	// ensure that registry used in this test is unique/does not have any past metrics registered on it
   341  	metrics.DefaultMetricsRegistry = metrics.NewRootMetricsRegistry()
   342  	server, serverErr, cleanup := createAndRunCustomTestServer(t, port, port, func(ctx context.Context, info witchcraft.InitInfo) (deferFn func(), rErr error) {
   343  		// These log lines will happen after the MetricWriters are initialized, so we should expect to see one sls.logging.length per line
   344  		audit2log.FromContext(ctx).Audit(superLongLogLine, audit2log.AuditResultSuccess)
   345  
   346  		return func() {}, nil
   347  	}, logOutputBuffer, func(t *testing.T, initFn witchcraft.InitFunc, installCfg config.Install, logOutputBuffer io.Writer) *witchcraft.Server {
   348  		installCfg.MetricsEmitFrequency = 100 * time.Millisecond
   349  		return createTestServer(t, initFn, installCfg, logOutputBuffer)
   350  	})
   351  	defer func() {
   352  		require.NoError(t, server.Close())
   353  	}()
   354  	defer cleanup()
   355  
   356  	// Allow the metric emitter to do its thing.
   357  	time.Sleep(150 * time.Millisecond)
   358  
   359  	parts := strings.Split(logOutputBuffer.String(), "\n")
   360  	var metricLogs []logging.MetricLogV1
   361  	for _, curr := range parts {
   362  		if strings.Contains(curr, `"metric.1"`) {
   363  			var currLog logging.MetricLogV1
   364  			require.NoError(t, json.Unmarshal([]byte(curr), &currLog))
   365  			metricLogs = append(metricLogs, currLog)
   366  		}
   367  	}
   368  
   369  	for _, metricLog := range metricLogs {
   370  		switch metricLog.MetricName {
   371  		case "logging.sls.length":
   372  			assert.Equal(t, "histogram", metricLog.MetricType, "logging.sls.length metric had incorrect type")
   373  			assert.NotNil(t, metricLog.Values["max"])
   374  			assert.NotNil(t, metricLog.Values["count"])
   375  			assert.NotNil(t, metricLog.Tags["type"])
   376  			if metricLog.Tags["type"] == "audit" {
   377  				// skip log entry that emits "0", as it is the auto-generated zero value entry
   378  				if metricLog.Values["count"] == json.Number("0") {
   379  					continue
   380  				}
   381  				require.Equal(t, json.Number("1"), metricLog.Values["count"])
   382  
   383  				maxJSON, ok := metricLog.Values["max"].(json.Number)
   384  				require.True(t, ok)
   385  				max, err := maxJSON.Int64()
   386  				require.NoError(t, err)
   387  				require.Greater(t, max, int64(len(superLongLogLine)))
   388  			}
   389  		default:
   390  		}
   391  	}
   392  
   393  	select {
   394  	case err := <-serverErr:
   395  		require.NoError(t, err)
   396  	default:
   397  	}
   398  }
   399  
   400  // TestEmitMetricsEmptyBlacklist verifies that metrics are printed periodically by a Witchcraft server and that, if the
   401  // blacklist is empty, all values are emitted. We verify both custom metrics set in the InitFunc (with tags) and
   402  // server.response metrics from the metrics middleware.
   403  func TestEmitMetricsEmptyBlacklist(t *testing.T) {
   404  	logOutputBuffer := &bytes.Buffer{}
   405  	port, err := httpserver.AvailablePort()
   406  	require.NoError(t, err)
   407  
   408  	// ensure that registry used in this test is unique/does not have any past metrics registered on it
   409  	metrics.DefaultMetricsRegistry = metrics.NewRootMetricsRegistry()
   410  	server, serverErr, cleanup := createAndRunCustomTestServer(t, port, port, func(ctx context.Context, info witchcraft.InitInfo) (deferFn func(), rErr error) {
   411  		ctx = metrics.AddTags(ctx, metrics.MustNewTag("key", "val"))
   412  		metrics.FromContext(ctx).Counter("my-counter").Inc(13)
   413  		return nil, info.Router.Post("/error", http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
   414  			rw.WriteHeader(500)
   415  		}))
   416  	}, logOutputBuffer, func(t *testing.T, initFn witchcraft.InitFunc, installCfg config.Install, logOutputBuffer io.Writer) *witchcraft.Server {
   417  		installCfg.MetricsEmitFrequency = 100 * time.Millisecond
   418  		return createTestServer(t, initFn, installCfg, logOutputBuffer).WithMetricTypeValuesBlacklist(map[string]map[string]struct{}{})
   419  	})
   420  	defer func() {
   421  		require.NoError(t, server.Close())
   422  	}()
   423  	defer cleanup()
   424  
   425  	// Make POST that will 404 to trigger request size and error rate metrics
   426  	_, err = testServerClient().Post(fmt.Sprintf("https://localhost:%d/%s/%s", port, basePath, "error"), "application/json", strings.NewReader("{}"))
   427  	require.NoError(t, err)
   428  
   429  	// Allow the metric emitter to do its thing.
   430  	time.Sleep(150 * time.Millisecond)
   431  
   432  	parts := strings.Split(logOutputBuffer.String(), "\n")
   433  	var metricLogs []logging.MetricLogV1
   434  	for _, curr := range parts {
   435  		if strings.Contains(curr, `"metric.1"`) {
   436  			var currLog logging.MetricLogV1
   437  			require.NoError(t, json.Unmarshal([]byte(curr), &currLog))
   438  			metricLogs = append(metricLogs, currLog)
   439  		}
   440  	}
   441  
   442  	var (
   443  		seenLoggingSLS,
   444  		seenLoggingSLSLength,
   445  		seenMyCounter,
   446  		seenResponseTimer,
   447  		seenResponseSize,
   448  		seenRequestSize,
   449  		seenResponseError,
   450  		seenUptime bool
   451  	)
   452  	for _, metricLog := range metricLogs {
   453  		switch metricLog.MetricName {
   454  		case "logging.sls":
   455  			seenLoggingSLS = true
   456  			assert.Equal(t, "meter", metricLog.MetricType, "logging.sls metric had incorrect type")
   457  			assert.NotNil(t, metricLog.Values["count"])
   458  			assert.NotNil(t, metricLog.Tags["type"])
   459  
   460  			metricTagLevel := metricLog.Tags["level"]
   461  			if metricLog.Tags["type"] == "service.1" {
   462  				assert.NotEqual(t, "", metricTagLevel)
   463  			} else {
   464  				assert.Equal(t, "", metricTagLevel)
   465  			}
   466  		case "logging.sls.length":
   467  			seenLoggingSLSLength = true
   468  			assert.Equal(t, "histogram", metricLog.MetricType, "logging.sls.length metric had incorrect type")
   469  			assert.NotNil(t, metricLog.Values["max"])
   470  			assert.NotNil(t, metricLog.Values["p95"])
   471  			assert.NotNil(t, metricLog.Values["p99"])
   472  			assert.NotNil(t, metricLog.Values["count"])
   473  			assert.NotNil(t, metricLog.Tags["type"])
   474  		case "my-counter":
   475  			seenMyCounter = true
   476  			assert.Equal(t, "counter", metricLog.MetricType, "my-counter metric had incorrect type")
   477  			assert.Equal(t, map[string]interface{}{"count": json.Number("13")}, metricLog.Values)
   478  			assert.Equal(t, map[string]string{"key": "val"}, metricLog.Tags)
   479  		case "server.response":
   480  			seenResponseTimer = true
   481  			assert.Equal(t, "timer", metricLog.MetricType, "server.response metric had incorrect type")
   482  
   483  			// blacklist is set to empty, so all keys should be non-nil
   484  			assert.NotNil(t, metricLog.Values["count"])
   485  			assert.NotNil(t, metricLog.Values["1m"])
   486  			assert.NotNil(t, metricLog.Values["5m"])
   487  			assert.NotNil(t, metricLog.Values["15m"])
   488  			assert.NotNil(t, metricLog.Values["meanRate"])
   489  			assert.NotNil(t, metricLog.Values["min"])
   490  			assert.NotNil(t, metricLog.Values["max"])
   491  			assert.NotNil(t, metricLog.Values["mean"])
   492  			assert.NotNil(t, metricLog.Values["stddev"])
   493  			assert.NotNil(t, metricLog.Values["p50"])
   494  			assert.NotNil(t, metricLog.Values["p95"])
   495  			assert.NotNil(t, metricLog.Values["p99"])
   496  		case "server.request.size":
   497  			seenRequestSize = true
   498  			assert.Equal(t, "histogram", metricLog.MetricType, "server.response metric had incorrect type")
   499  
   500  			// blacklist is set to empty, so all keys should be non-nil
   501  			assert.NotNil(t, metricLog.Values["min"])
   502  			assert.NotNil(t, metricLog.Values["max"])
   503  			assert.NotNil(t, metricLog.Values["mean"])
   504  			assert.NotNil(t, metricLog.Values["stddev"])
   505  			assert.NotNil(t, metricLog.Values["p50"])
   506  			assert.NotNil(t, metricLog.Values["p95"])
   507  			assert.NotNil(t, metricLog.Values["p99"])
   508  			assert.NotNil(t, metricLog.Values["count"])
   509  		case "server.response.size":
   510  			seenResponseSize = true
   511  			assert.Equal(t, "histogram", metricLog.MetricType, "server.response metric had incorrect type")
   512  
   513  			// blacklist is set to empty, so all keys should be non-nil
   514  			assert.NotNil(t, metricLog.Values["min"])
   515  			assert.NotNil(t, metricLog.Values["max"])
   516  			assert.NotNil(t, metricLog.Values["mean"])
   517  			assert.NotNil(t, metricLog.Values["stddev"])
   518  			assert.NotNil(t, metricLog.Values["p50"])
   519  			assert.NotNil(t, metricLog.Values["p95"])
   520  			assert.NotNil(t, metricLog.Values["p99"])
   521  			assert.NotNil(t, metricLog.Values["count"])
   522  		case "server.response.error":
   523  			seenResponseError = true
   524  			assert.Equal(t, "meter", metricLog.MetricType, "server.response metric had incorrect type")
   525  
   526  			// blacklist is set to empty, so all keys should be non-nil
   527  			assert.NotNil(t, metricLog.Values["count"])
   528  			assert.NotNil(t, metricLog.Values["1m"])
   529  			assert.NotNil(t, metricLog.Values["5m"])
   530  			assert.NotNil(t, metricLog.Values["15m"])
   531  			assert.NotNil(t, metricLog.Values["mean"])
   532  		case "server.uptime":
   533  			seenUptime = true
   534  			assert.Equal(t, "gauge", metricLog.MetricType, "server.uptime metric had incorrect type")
   535  			assert.NotZero(t, metricLog.Values["value"])
   536  		default:
   537  			assert.Fail(t, "unexpected metric encountered", "%s", metricLog.MetricName)
   538  		}
   539  	}
   540  	assert.True(t, seenLoggingSLS, "logging.sls metric was not emitted")
   541  	assert.True(t, seenLoggingSLSLength, "logging.sls.length metric was not emitted")
   542  	assert.True(t, seenMyCounter, "my-counter metric was not emitted")
   543  	assert.True(t, seenResponseTimer, "server.response metric was not emitted")
   544  	assert.True(t, seenRequestSize, "server.request.size metric was not emitted")
   545  	assert.True(t, seenResponseSize, "server.response.size metric was not emitted")
   546  	assert.True(t, seenResponseError, "server.response.error metric was not emitted")
   547  	assert.True(t, seenUptime, "server.uptime metric was not emitted")
   548  
   549  	select {
   550  	case err := <-serverErr:
   551  		require.NoError(t, err)
   552  	default:
   553  	}
   554  }
   555  
   556  // TestMetricTypeValueBlacklist tests that if a metric type value is blacklisted, all metric of that type does not
   557  // contain any of the blacklisted keys.
   558  func TestMetricTypeValueBlacklist(t *testing.T) {
   559  	logOutputBuffer := &bytes.Buffer{}
   560  	port, err := httpserver.AvailablePort()
   561  	require.NoError(t, err)
   562  
   563  	// ensure that registry used in this test is unique/does not have any past metrics registered on it
   564  	metrics.DefaultMetricsRegistry = metrics.NewRootMetricsRegistry()
   565  	server, serverErr, cleanup := createAndRunCustomTestServer(t, port, port, func(ctx context.Context, info witchcraft.InitInfo) (deferFn func(), rErr error) {
   566  		ctx = metrics.AddTags(ctx, metrics.MustNewTag("key", "val"))
   567  		metrics.FromContext(ctx).Counter("my-counter").Inc(13)
   568  		return nil, info.Router.Post("/error", http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
   569  			rw.WriteHeader(500)
   570  		}))
   571  	}, logOutputBuffer, func(t *testing.T, initFn witchcraft.InitFunc, installCfg config.Install, logOutputBuffer io.Writer) *witchcraft.Server {
   572  		installCfg.MetricsEmitFrequency = 100 * time.Millisecond
   573  		return createTestServer(t, initFn, installCfg, logOutputBuffer).WithMetricTypeValuesBlacklist(map[string]map[string]struct{}{
   574  			"histogram": {"count": {}},
   575  		})
   576  	})
   577  	defer func() {
   578  		require.NoError(t, server.Close())
   579  	}()
   580  	defer cleanup()
   581  
   582  	// Make POST that will 404 to trigger request size and error rate metrics
   583  	_, err = testServerClient().Post(fmt.Sprintf("https://localhost:%d/%s/%s", port, basePath, "error"), "application/json", strings.NewReader("{}"))
   584  	require.NoError(t, err)
   585  
   586  	// Allow the metric emitter to do its thing.
   587  	time.Sleep(150 * time.Millisecond)
   588  
   589  	parts := strings.Split(logOutputBuffer.String(), "\n")
   590  	var metricLogs []logging.MetricLogV1
   591  	for _, curr := range parts {
   592  		if strings.Contains(curr, `"metric.1"`) {
   593  			var currLog logging.MetricLogV1
   594  			require.NoError(t, json.Unmarshal([]byte(curr), &currLog))
   595  			metricLogs = append(metricLogs, currLog)
   596  		}
   597  	}
   598  
   599  	var (
   600  		seenLoggingSLS,
   601  		seenLoggingSLSLength,
   602  		seenMyCounter,
   603  		seenResponseTimer,
   604  		seenResponseSize,
   605  		seenRequestSize,
   606  		seenResponseError,
   607  		seenUptime bool
   608  	)
   609  	for _, metricLog := range metricLogs {
   610  		switch metricLog.MetricName {
   611  		case "logging.sls":
   612  			seenLoggingSLS = true
   613  			assert.Equal(t, "meter", metricLog.MetricType, "logging.sls metric had incorrect type")
   614  			assert.NotNil(t, metricLog.Values["count"])
   615  			assert.NotNil(t, metricLog.Tags["type"])
   616  
   617  			metricTagLevel := metricLog.Tags["level"]
   618  			if metricLog.Tags["type"] == "service.1" {
   619  				assert.NotEqual(t, "", metricTagLevel)
   620  			} else {
   621  				assert.Equal(t, "", metricTagLevel)
   622  			}
   623  		case "logging.sls.length":
   624  			seenLoggingSLSLength = true
   625  			assert.Equal(t, "histogram", metricLog.MetricType, "logging.sls.length metric had incorrect type")
   626  			assert.NotNil(t, metricLog.Values["max"])
   627  			assert.NotNil(t, metricLog.Values["p95"])
   628  			assert.NotNil(t, metricLog.Values["p99"])
   629  			assert.Nil(t, metricLog.Values["count"])
   630  			assert.NotNil(t, metricLog.Tags["type"])
   631  		case "my-counter":
   632  			seenMyCounter = true
   633  			assert.Equal(t, "counter", metricLog.MetricType, "my-counter metric had incorrect type")
   634  			assert.Equal(t, map[string]interface{}{"count": json.Number("13")}, metricLog.Values)
   635  			assert.Equal(t, map[string]string{"key": "val"}, metricLog.Tags)
   636  		case "server.response":
   637  			seenResponseTimer = true
   638  			assert.Equal(t, "timer", metricLog.MetricType, "server.response metric had incorrect type")
   639  			assert.NotNil(t, metricLog.Values["count"])
   640  			assert.NotNil(t, metricLog.Values["mean"])
   641  			assert.NotNil(t, metricLog.Values["max"])
   642  			assert.NotNil(t, metricLog.Values["min"])
   643  		case "server.request.size":
   644  			seenRequestSize = true
   645  			assert.Equal(t, "histogram", metricLog.MetricType, "server.response metric had incorrect type")
   646  			// there should be no value for "count" because it is blacklisted for the histogram type
   647  			assert.Nil(t, metricLog.Values["count"])
   648  		case "server.response.size":
   649  			seenResponseSize = true
   650  			assert.Equal(t, "histogram", metricLog.MetricType, "server.response metric had incorrect type")
   651  			// there should be no value for "count" because it is blacklisted for the histogram type
   652  			assert.Nil(t, metricLog.Values["count"])
   653  		case "server.response.error":
   654  			seenResponseError = true
   655  			assert.Equal(t, "meter", metricLog.MetricType, "server.response metric had incorrect type")
   656  			assert.NotNil(t, metricLog.Values["count"])
   657  		case "server.uptime":
   658  			seenUptime = true
   659  			assert.Equal(t, "gauge", metricLog.MetricType, "server.uptime metric had incorrect type")
   660  			assert.NotZero(t, metricLog.Values["value"])
   661  		default:
   662  			assert.Fail(t, "unexpected metric encountered: %s", metricLog.MetricName)
   663  		}
   664  	}
   665  	assert.True(t, seenLoggingSLS, "logging.sls metric was not emitted")
   666  	assert.True(t, seenLoggingSLSLength, "logging.sls.length metric was not emitted")
   667  	assert.True(t, seenMyCounter, "my-counter metric was not emitted")
   668  	assert.True(t, seenResponseTimer, "server.response metric was not emitted")
   669  	assert.True(t, seenRequestSize, "server.request.size metric was not emitted")
   670  	assert.True(t, seenResponseSize, "server.response.size metric was not emitted")
   671  	assert.True(t, seenResponseError, "server.response.error metric was not emitted")
   672  	assert.True(t, seenUptime, "server.uptime metric was not emitted")
   673  
   674  	select {
   675  	case err := <-serverErr:
   676  		require.NoError(t, err)
   677  	default:
   678  	}
   679  }
   680  
   681  // TestMetricsBlacklist verifies that blacklisted metrics are not emitted.
   682  func TestMetricsBlacklist(t *testing.T) {
   683  	logOutputBuffer := &bytes.Buffer{}
   684  	port, err := httpserver.AvailablePort()
   685  	require.NoError(t, err)
   686  	server, serverErr, cleanup := createAndRunCustomTestServer(t, port, port, func(ctx context.Context, info witchcraft.InitInfo) (deferFn func(), rErr error) {
   687  		ctx = metrics.AddTags(ctx, metrics.MustNewTag("key", "val"))
   688  		metrics.FromContext(ctx).Counter("my-counter").Inc(13)
   689  		return nil, info.Router.Post("/error", http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
   690  			rw.WriteHeader(500)
   691  		}))
   692  	}, logOutputBuffer, func(t *testing.T, initFn witchcraft.InitFunc, installCfg config.Install, logOutputBuffer io.Writer) *witchcraft.Server {
   693  		installCfg.MetricsEmitFrequency = 100 * time.Millisecond
   694  		server := createTestServer(t, initFn, installCfg, logOutputBuffer)
   695  		server.WithMetricsBlacklist(map[string]struct{}{
   696  			"my-counter":          {},
   697  			"logging.sls":         {},
   698  			"logging.sls.length":  {},
   699  			"server.request.size": {},
   700  			"server.uptime":       {},
   701  		})
   702  		return server
   703  	})
   704  	defer func() {
   705  		require.NoError(t, server.Close())
   706  	}()
   707  	defer cleanup()
   708  
   709  	// Make POST that will 404 to trigger request size and error rate metrics
   710  	_, err = testServerClient().Post(fmt.Sprintf("https://localhost:%d/%s/%s", port, basePath, "error"), "application/json", strings.NewReader("{}"))
   711  	require.NoError(t, err)
   712  
   713  	// Allow the metric emitter to do its thing.
   714  	time.Sleep(150 * time.Millisecond)
   715  
   716  	parts := strings.Split(logOutputBuffer.String(), "\n")
   717  	var metricLogs []logging.MetricLogV1
   718  	for _, curr := range parts {
   719  		if strings.Contains(curr, `"metric.1"`) {
   720  			var currLog logging.MetricLogV1
   721  			require.NoError(t, json.Unmarshal([]byte(curr), &currLog))
   722  			metricLogs = append(metricLogs, currLog)
   723  		}
   724  	}
   725  
   726  	var (
   727  		seenLoggingSLS,
   728  		seenLoggingSLSLength,
   729  		seenMyCounter,
   730  		seenResponseTimer,
   731  		seenResponseSize,
   732  		seenRequestSize,
   733  		seenResponseError bool
   734  	)
   735  	for _, metricLog := range metricLogs {
   736  		switch metricLog.MetricName {
   737  		case "logging.sls":
   738  			assert.Fail(t, "logging.sls metric should not be emitted")
   739  		case "logging.sls.length":
   740  			assert.Fail(t, "logging.sls.length metric should not be emitted")
   741  		case "my-counter":
   742  			assert.Fail(t, "my-counter metric should not be emitted")
   743  		case "server.response":
   744  			seenResponseTimer = true
   745  			assert.Equal(t, "timer", metricLog.MetricType, "server.response metric had incorrect type")
   746  			assert.NotZero(t, metricLog.Values["count"])
   747  		case "server.request.size":
   748  			assert.Fail(t, "server.request.size metric should not be emitted")
   749  		case "server.response.size":
   750  			seenResponseSize = true
   751  			assert.Equal(t, "histogram", metricLog.MetricType, "server.response metric had incorrect type")
   752  			assert.NotZero(t, metricLog.Values["count"])
   753  		case "server.response.error":
   754  			seenResponseError = true
   755  			assert.Equal(t, "meter", metricLog.MetricType, "server.response metric had incorrect type")
   756  			assert.NotZero(t, metricLog.Values["count"])
   757  		default:
   758  			assert.Fail(t, "unexpected metric encountered: %s", metricLog.MetricName)
   759  		}
   760  	}
   761  	assert.False(t, seenMyCounter, "my-counter metric was emitted")
   762  	assert.False(t, seenRequestSize, "server.request.size metric was emitted")
   763  	assert.False(t, seenLoggingSLS, "logging.sls metric was emitted")
   764  	assert.False(t, seenLoggingSLSLength, "logging.sls.length metric was emitted")
   765  
   766  	assert.True(t, seenResponseTimer, "server.response metric was not emitted")
   767  	assert.True(t, seenResponseSize, "server.response.size metric was not emitted")
   768  	assert.True(t, seenResponseError, "server.response.error metric was not emitted")
   769  
   770  	select {
   771  	case err := <-serverErr:
   772  		require.NoError(t, err)
   773  	default:
   774  	}
   775  }