github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/query/api/v1/middleware/metrics_test.go (about)

     1  // Copyright (c) 2021 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package middleware
    22  
    23  import (
    24  	"net/http"
    25  	"net/http/httptest"
    26  	"strconv"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/m3db/m3/src/cmd/services/m3query/config"
    31  	"github.com/m3db/m3/src/query/api/v1/route"
    32  	"github.com/m3db/m3/src/query/parser/promql"
    33  	"github.com/m3db/m3/src/x/headers"
    34  	"github.com/m3db/m3/src/x/instrument"
    35  	"github.com/m3db/m3/src/x/tallytest"
    36  
    37  	"github.com/gorilla/mux"
    38  	"github.com/stretchr/testify/require"
    39  	"github.com/uber-go/tally"
    40  )
    41  
    42  func TestResponseMetrics(t *testing.T) {
    43  	scope := tally.NewTestScope("", nil)
    44  	iOpts := instrument.NewOptions().SetMetricsScope(scope)
    45  
    46  	r := mux.NewRouter()
    47  	route := r.NewRoute()
    48  	opts := Options{
    49  		InstrumentOpts: iOpts,
    50  		Route:          route,
    51  	}
    52  
    53  	h := ResponseMetrics(opts).Middleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
    54  		w.WriteHeader(200)
    55  	}))
    56  	route.Path("/test").Handler(h)
    57  
    58  	server := httptest.NewServer(r)
    59  	defer server.Close()
    60  
    61  	resp, err := server.Client().Get(server.URL + "/test?foo=bar") //nolint: noctx
    62  	require.NoError(t, err)
    63  	require.NoError(t, resp.Body.Close())
    64  
    65  	snapshot := scope.Snapshot()
    66  	tallytest.AssertCounterValue(t, 1, snapshot, "request", map[string]string{
    67  		"path":   "/test",
    68  		"status": "200",
    69  		"type":   "coordinator",
    70  	})
    71  
    72  	hist := snapshot.Histograms()
    73  	require.True(t, len(hist) == 1)
    74  	for _, h := range hist {
    75  		require.Equal(t, "latency", h.Name())
    76  		require.Equal(t, map[string]string{
    77  			"path": "/test",
    78  			"type": "coordinator",
    79  		}, h.Tags())
    80  	}
    81  }
    82  
    83  func TestResponseMetricsCustomMetricType(t *testing.T) {
    84  	scope := tally.NewTestScope("", nil)
    85  	iOpts := instrument.NewOptions().SetMetricsScope(scope)
    86  
    87  	r := mux.NewRouter()
    88  	route := r.NewRoute()
    89  	opts := Options{
    90  		InstrumentOpts: iOpts,
    91  		Route:          route,
    92  	}
    93  
    94  	h := ResponseMetrics(opts).Middleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
    95  		r.Header.Set(headers.CustomResponseMetricsType, "foo")
    96  		w.WriteHeader(200)
    97  	}))
    98  	route.Path("/test").Handler(h)
    99  
   100  	server := httptest.NewServer(r)
   101  	defer server.Close()
   102  
   103  	resp, err := server.Client().Get(server.URL + "/test?foo=bar") //nolint: noctx
   104  	require.NoError(t, err)
   105  	require.NoError(t, resp.Body.Close())
   106  
   107  	snapshot := scope.Snapshot()
   108  	tallytest.AssertCounterValue(t, 1, snapshot, "request", map[string]string{
   109  		"path":   "/test",
   110  		"status": "200",
   111  		"type":   "foo",
   112  	})
   113  
   114  	hist := snapshot.Histograms()
   115  	require.True(t, len(hist) == 1)
   116  	for _, h := range hist {
   117  		require.Equal(t, "latency", h.Name())
   118  		require.Equal(t, map[string]string{
   119  			"path": "/test",
   120  			"type": "foo",
   121  		}, h.Tags())
   122  	}
   123  }
   124  
   125  var parseQueryParams ParseQueryParams = func(r *http.Request, _ time.Time) (QueryParams, error) {
   126  	if err := r.ParseForm(); err != nil {
   127  		return QueryParams{}, err
   128  	}
   129  	params := QueryParams{
   130  		Query: r.FormValue("query"),
   131  	}
   132  	if s := r.FormValue("start"); s != "" {
   133  		start, err := strconv.Atoi(r.FormValue("start"))
   134  		if err != nil {
   135  			return QueryParams{}, err
   136  		}
   137  		params.Start = time.Unix(int64(start), 0)
   138  	}
   139  
   140  	if s := r.FormValue("end"); s != "" {
   141  		end, err := strconv.Atoi(r.FormValue("end"))
   142  		if err != nil {
   143  			return QueryParams{}, err
   144  		}
   145  		params.End = time.Unix(int64(end), 0)
   146  	}
   147  	return params, nil
   148  }
   149  
   150  func TestLargeResponseMetrics(t *testing.T) {
   151  	scope := tally.NewTestScope("", nil)
   152  	iOpts := instrument.NewOptions().SetMetricsScope(scope)
   153  
   154  	r := mux.NewRouter()
   155  	route := r.NewRoute()
   156  	opts := Options{
   157  		InstrumentOpts: iOpts,
   158  		Route:          route,
   159  		Metrics: MetricsOptions{
   160  			Config: config.MetricsMiddlewareConfiguration{
   161  				QueryEndpointsClassification: config.QueryClassificationConfig{
   162  					ResultsBuckets:  []int{1, 10},
   163  					DurationBuckets: []time.Duration{1 * time.Minute, 15 * time.Minute},
   164  				},
   165  			},
   166  			ParseQueryParams: parseQueryParams,
   167  		},
   168  	}
   169  
   170  	h := ResponseMetrics(opts).Middleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   171  		w.Header().Add(headers.FetchedSeriesCount, "15")
   172  		w.WriteHeader(200)
   173  	}))
   174  	route.Path("/api/v1/query").Handler(h)
   175  
   176  	server := httptest.NewServer(r)
   177  	defer server.Close()
   178  
   179  	resp, err := server.Client().Get(server.URL + "/api/v1/query?query=rate(up[20m])") //nolint: noctx
   180  	require.NoError(t, err)
   181  	require.NoError(t, resp.Body.Close())
   182  
   183  	snapshot := scope.Snapshot()
   184  	tallytest.AssertCounterValue(t, 1, snapshot, "request", map[string]string{
   185  		"path":                 "/api/v1/query",
   186  		"status":               "200",
   187  		"type":                 "coordinator",
   188  		resultsClassification:  "10",
   189  		durationClassification: "15m0s",
   190  	})
   191  
   192  	hist := snapshot.Histograms()
   193  	require.True(t, len(hist) == 1)
   194  	for _, h := range hist {
   195  		require.Equal(t, "latency", h.Name())
   196  		require.Equal(t, map[string]string{
   197  			"path":                 "/api/v1/query",
   198  			"type":                 "coordinator",
   199  			resultsClassification:  "10",
   200  			durationClassification: "15m0s",
   201  		}, h.Tags())
   202  	}
   203  }
   204  
   205  func TestMultipleLargeResponseMetricsWithLatencyStatus(t *testing.T) {
   206  	testMultipleLargeResponseMetrics(t, true)
   207  }
   208  
   209  func TestMultipleLargeResponseMetricsWithoutLatencyStatus(t *testing.T) {
   210  	testMultipleLargeResponseMetrics(t, false)
   211  }
   212  
   213  func TestCustomMetricsRepeatedGets(t *testing.T) {
   214  	scope := tally.NewTestScope("", nil)
   215  	iOpts := instrument.NewOptions().SetMetricsScope(scope)
   216  
   217  	cm := newCustomMetrics(iOpts)
   218  
   219  	_ = cm.getOrCreate("foo")
   220  	_ = cm.getOrCreate("foo")
   221  	require.Equal(t, len(cm.metrics), 1)
   222  
   223  	_ = cm.getOrCreate("foo2")
   224  	require.Equal(t, len(cm.metrics), 2)
   225  }
   226  
   227  func testMultipleLargeResponseMetrics(t *testing.T, addStatus bool) {
   228  	scope := tally.NewTestScope("", nil)
   229  	iOpts := instrument.NewOptions().SetMetricsScope(scope)
   230  
   231  	r := mux.NewRouter()
   232  	route := r.NewRoute()
   233  	opts := Options{
   234  		InstrumentOpts: iOpts,
   235  		Route:          route,
   236  		Metrics: MetricsOptions{
   237  			Config: config.MetricsMiddlewareConfiguration{
   238  				QueryEndpointsClassification: config.QueryClassificationConfig{
   239  					ResultsBuckets:  []int{1, 10},
   240  					DurationBuckets: []time.Duration{1 * time.Minute, 15 * time.Minute},
   241  				},
   242  				AddStatusToLatencies: addStatus,
   243  			},
   244  			ParseQueryParams: parseQueryParams,
   245  		},
   246  	}
   247  
   248  	// NB: pass expected seriesCount from qs to the test.
   249  	seriesCount := "series_count"
   250  	responseCode := "response_code"
   251  	h := ResponseMetrics(opts).Middleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   252  		count := r.URL.Query().Get(seriesCount)
   253  		w.Header().Add(headers.FetchedSeriesCount, count)
   254  		code := r.URL.Query().Get(responseCode)
   255  		c, err := strconv.Atoi(code)
   256  		require.NoError(t, err)
   257  		w.WriteHeader(c)
   258  	}))
   259  
   260  	route.Path("/api/v1/query_range").Handler(h)
   261  	server := httptest.NewServer(r)
   262  	defer server.Close()
   263  
   264  	urls := []string{
   265  		"query=rate(up[20m])&series_count=15&response_code=200&start=1&end=1",
   266  		"query=rate(up[10m])&series_count=15&response_code=200&start=1&end=1",
   267  		"query=rate(up[10m])&series_count=5&response_code=200&start=1&end=1",
   268  		"query=rate(up[20m])&series_count=15&response_code=300&start=1&end=1",
   269  		// NB: this should be large since the end-start + query duration is 15m.
   270  		"query=rate(up[14m])&series_count=15&response_code=200&start=1621458000&end=1621458060",
   271  	}
   272  
   273  	for _, url := range urls {
   274  		resp, err := server.Client().Get(server.URL + "/api/v1/query_range?" + url) //nolint: noctx
   275  		require.NoError(t, err)
   276  		require.NoError(t, resp.Body.Close())
   277  	}
   278  
   279  	snapshot := scope.Snapshot()
   280  	tallytest.AssertCounterValue(t, 2, snapshot, "request", map[string]string{
   281  		"path":                 "/api/v1/query_range",
   282  		"status":               "200",
   283  		"type":                 "coordinator",
   284  		resultsClassification:  "10",
   285  		durationClassification: "15m0s",
   286  	})
   287  
   288  	tallytest.AssertCounterValue(t, 1, snapshot, "request", map[string]string{
   289  		"path":                 "/api/v1/query_range",
   290  		"status":               "300",
   291  		"type":                 "coordinator",
   292  		resultsClassification:  "unclassified",
   293  		durationClassification: "unclassified",
   294  	})
   295  
   296  	tallytest.AssertCounterValue(t, 1, snapshot, "request", map[string]string{
   297  		"path":                 "/api/v1/query_range",
   298  		"status":               "200",
   299  		"type":                 "coordinator",
   300  		resultsClassification:  "1",
   301  		durationClassification: "1m0s",
   302  	})
   303  
   304  	tallytest.AssertCounterValue(t, 4, snapshot, "count", map[string]string{
   305  		"type":   "coordinator",
   306  		"status": "classified_duration",
   307  	})
   308  	tallytest.AssertCounterValue(t, 4, snapshot, "count", map[string]string{
   309  		"type":   "coordinator",
   310  		"status": "classified_result",
   311  	})
   312  
   313  	var (
   314  		hist       = snapshot.Histograms()
   315  		exHistLen  = 4
   316  		exTagCount = 4
   317  	)
   318  
   319  	if addStatus {
   320  		// NB: if status is added, we expect to see three histogram entries,
   321  		// since they will also include the status code in the tags list; otherwise
   322  		// code:200 and code:300 queries are expected to go to the same histogram
   323  		// metric.
   324  		exTagCount++
   325  	}
   326  
   327  	require.Equal(t, exHistLen, len(hist))
   328  	buckets := map[string]map[string]int{
   329  		resultsClassification: {
   330  			"1":  0,
   331  			"10": 0,
   332  		},
   333  		durationClassification: {
   334  			"1m0s":  0,
   335  			"15m0s": 0,
   336  		},
   337  	}
   338  	statuses := map[string]int{}
   339  	for _, h := range hist {
   340  		require.Equal(t, "latency", h.Name())
   341  
   342  		tags := h.Tags()
   343  		require.Equal(t, exTagCount, len(tags))
   344  		require.Equal(t, "/api/v1/query_range", tags["path"])
   345  		require.Equal(t, metricsTypeTagDefaultValue, tags[metricsTypeTagName])
   346  
   347  		buckets[resultsClassification][tags[resultsClassification]]++
   348  		buckets[durationClassification][tags[durationClassification]]++
   349  		if addStatus {
   350  			statuses[tags["status"]]++
   351  		}
   352  	}
   353  
   354  	expectedBuckets := map[string]map[string]int{
   355  		resultsClassification: {
   356  			"1":            1,
   357  			"10":           2,
   358  			"unclassified": 1,
   359  		},
   360  		durationClassification: {
   361  			"1m0s":         2,
   362  			"15m0s":        1,
   363  			"unclassified": 1,
   364  		},
   365  	}
   366  	if addStatus {
   367  		require.Equal(t, map[string]int{"200": 3, "300": 1}, statuses)
   368  	}
   369  
   370  	require.Equal(t, expectedBuckets, buckets)
   371  }
   372  
   373  func TestRequestClassificationByEndpoints(t *testing.T) {
   374  	defaultConfig := config.MetricsMiddlewareConfiguration{
   375  		QueryEndpointsClassification: config.QueryClassificationConfig{
   376  			ResultsBuckets:  []int{1, 10, 100, 1000},
   377  			DurationBuckets: []time.Duration{1 * time.Minute, 10 * time.Minute, 100 * time.Minute},
   378  		},
   379  		LabelEndpointsClassification: config.QueryClassificationConfig{
   380  			ResultsBuckets:  []int{2, 20, 200, 2000},
   381  			DurationBuckets: []time.Duration{2 * time.Minute, 20 * time.Minute, 200 * time.Minute},
   382  		},
   383  	}
   384  	tests := []struct {
   385  		name             string
   386  		path             string
   387  		config           config.MetricsMiddlewareConfiguration
   388  		isQueryEndpoint  bool
   389  		query            string
   390  		fetchedResult    string
   391  		expectedResult   string
   392  		expectedDuration string
   393  	}{
   394  		{
   395  			name:             "query_range",
   396  			path:             route.QueryRangeURL,
   397  			config:           defaultConfig,
   398  			isQueryEndpoint:  true,
   399  			query:            "query=sum(rate(coordinator_http_handler_http_handler_request[1m]))&start=1&end=660",
   400  			fetchedResult:    "25",
   401  			expectedResult:   "10",
   402  			expectedDuration: "10m0s",
   403  		},
   404  		{
   405  			name:             "query_range - no results",
   406  			path:             route.QueryRangeURL,
   407  			config:           defaultConfig,
   408  			isQueryEndpoint:  true,
   409  			query:            "query=sum(rate(coordinator_http_handler_http_handler_request[1m]))&start=1&end=660",
   410  			fetchedResult:    "",
   411  			expectedResult:   "1",
   412  			expectedDuration: "10m0s",
   413  		},
   414  		{
   415  			name:             "query",
   416  			path:             route.QueryURL,
   417  			config:           defaultConfig,
   418  			isQueryEndpoint:  true,
   419  			query:            "query=sum(rate(coordinator_http_handler_http_handler_request[1m]))&time=1630611461",
   420  			fetchedResult:    "1",
   421  			expectedResult:   "1",
   422  			expectedDuration: "1m0s",
   423  		},
   424  		{
   425  			name:             "label_values",
   426  			path:             route.Prefix + "/label/__name__/values",
   427  			config:           defaultConfig,
   428  			isQueryEndpoint:  false,
   429  			query:            "start=0&end=1800",
   430  			fetchedResult:    "30000",
   431  			expectedResult:   "2000",
   432  			expectedDuration: "20m0s",
   433  		},
   434  		{
   435  			name:             "label_values -- max time",
   436  			path:             route.Prefix + "/label/__name__/values",
   437  			config:           defaultConfig,
   438  			isQueryEndpoint:  false,
   439  			fetchedResult:    "30000",
   440  			expectedResult:   "2000",
   441  			expectedDuration: "3h20m0s",
   442  		},
   443  		{
   444  			name:             "label_names",
   445  			path:             route.LabelNamesURL,
   446  			config:           defaultConfig,
   447  			isQueryEndpoint:  false,
   448  			query:            "start=0&end=21600",
   449  			fetchedResult:    "300",
   450  			expectedResult:   "200",
   451  			expectedDuration: "3h20m0s",
   452  		},
   453  		{
   454  			name:             "label_names - no results",
   455  			path:             route.LabelNamesURL,
   456  			config:           defaultConfig,
   457  			isQueryEndpoint:  false,
   458  			query:            "start=0&end=21600",
   459  			fetchedResult:    "",
   460  			expectedResult:   "2",
   461  			expectedDuration: "3h20m0s",
   462  		},
   463  		{
   464  			name:             "non-classifiable endpoint",
   465  			path:             "/foo",
   466  			config:           defaultConfig,
   467  			isQueryEndpoint:  false,
   468  			query:            "start=0&end=21600",
   469  			expectedResult:   "unclassified",
   470  			expectedDuration: "unclassified",
   471  		},
   472  		{
   473  			name:            "disabled",
   474  			path:            "/foo",
   475  			config:          config.MetricsMiddlewareConfiguration{},
   476  			isQueryEndpoint: false,
   477  			query:           "start=0&end=21600",
   478  		},
   479  	}
   480  
   481  	for _, tt := range tests {
   482  		t.Run(tt.name, func(t *testing.T) {
   483  			scope := tally.NewTestScope("", nil)
   484  			iOpts := instrument.NewOptions().SetMetricsScope(scope)
   485  
   486  			r := mux.NewRouter()
   487  			route := r.NewRoute()
   488  			opts := Options{
   489  				InstrumentOpts: iOpts,
   490  				Route:          route,
   491  				Metrics: MetricsOptions{
   492  					Config:           tt.config,
   493  					ParseQueryParams: parseQueryParams,
   494  					ParseOptions: promql.NewParseOptions().
   495  						SetRequireStartEndTime(false).
   496  						SetNowFn(time.Now),
   497  				},
   498  			}
   499  
   500  			h := ResponseMetrics(opts).Middleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   501  				if tt.isQueryEndpoint {
   502  					w.Header().Add(headers.FetchedSeriesCount, tt.fetchedResult)
   503  				} else {
   504  					w.Header().Add(headers.FetchedMetadataCount, tt.fetchedResult)
   505  				}
   506  				w.WriteHeader(200)
   507  			}))
   508  
   509  			route.Path(tt.path).Handler(h)
   510  			server := httptest.NewServer(r)
   511  			defer server.Close()
   512  
   513  			resp, err := server.Client().Get(server.URL + tt.path + "?" + tt.query) //nolint: noctx
   514  			require.NoError(t, err)
   515  			require.NoError(t, resp.Body.Close())
   516  
   517  			snapshot := scope.Snapshot()
   518  			// Validate request counter
   519  			tags := map[string]string{
   520  				"path":                 tt.path,
   521  				"status":               "200",
   522  				"type":                 "coordinator",
   523  				resultsClassification:  tt.expectedResult,
   524  				durationClassification: tt.expectedDuration,
   525  			}
   526  			if tt.expectedResult == "" {
   527  				delete(tags, resultsClassification)
   528  			}
   529  			if tt.expectedDuration == "" {
   530  				delete(tags, durationClassification)
   531  			}
   532  			tallytest.AssertCounterValue(t, 1, snapshot, "request", tags)
   533  
   534  			// Validate latency histogram
   535  			require.Equal(t, 1, len(snapshot.Histograms()))
   536  			var hist tally.HistogramSnapshot
   537  			for _, hist = range snapshot.Histograms() {
   538  			}
   539  
   540  			if !tt.config.AddStatusToLatencies {
   541  				delete(tags, "status")
   542  			}
   543  			require.Equal(t, tags, hist.Tags())
   544  
   545  			count := int64(0)
   546  			for _, value := range hist.Durations() {
   547  				count += value
   548  			}
   549  			require.Equal(t, int64(1), count)
   550  		})
   551  	}
   552  }