github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/query/api/v1/middleware/metrics.go (about)

     1  // Copyright (c) 2021 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package middleware
    22  
    23  import (
    24  	"net/http"
    25  	"strconv"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/m3db/m3/src/cmd/services/m3query/config"
    30  	"github.com/m3db/m3/src/query/parser/promql"
    31  	"github.com/m3db/m3/src/x/headers"
    32  	xhttp "github.com/m3db/m3/src/x/http"
    33  	"github.com/m3db/m3/src/x/instrument"
    34  
    35  	"github.com/gorilla/mux"
    36  	"github.com/uber-go/tally"
    37  )
    38  
    39  const (
    40  	metricsTypeTagName         = "type"
    41  	metricsTypeTagDefaultValue = "coordinator"
    42  )
    43  
    44  var histogramTimerOptions = instrument.NewHistogramTimerOptions(
    45  	instrument.HistogramTimerOptions{
    46  		// Use sparse histogram timer buckets to not overload with latency metrics.
    47  		HistogramBuckets: instrument.SparseHistogramTimerHistogramBuckets(),
    48  	})
    49  
    50  // MetricsOptions are the options for the metrics middleware.
    51  type MetricsOptions struct {
    52  	Config           config.MetricsMiddlewareConfiguration
    53  	ParseQueryParams ParseQueryParams
    54  	ParseOptions     promql.ParseOptions
    55  }
    56  
    57  // ResponseMetrics records metrics for the http response.
    58  func ResponseMetrics(opts Options) mux.MiddlewareFunc {
    59  	var (
    60  		iOpts = opts.InstrumentOpts
    61  		route = opts.Route
    62  		cfg   = opts.Metrics.Config
    63  	)
    64  
    65  	custom := newCustomMetrics(iOpts)
    66  	return func(base http.Handler) http.Handler {
    67  		return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
    68  			statusCodeTracking := &xhttp.StatusCodeTracker{ResponseWriter: w}
    69  			w = statusCodeTracking.WrappedResponseWriter()
    70  
    71  			start := time.Now()
    72  			base.ServeHTTP(w, r)
    73  			d := time.Since(start)
    74  
    75  			if !statusCodeTracking.WroteHeader {
    76  				return
    77  			}
    78  
    79  			path, err := route.GetPathTemplate()
    80  			if err != nil {
    81  				path = "unknown"
    82  			}
    83  
    84  			metricsType := r.Header.Get(headers.CustomResponseMetricsType)
    85  			if len(metricsType) == 0 {
    86  				metricsType = metricsTypeTagDefaultValue
    87  			}
    88  
    89  			m := custom.getOrCreate(metricsType)
    90  			classificationMetrics := m.classification
    91  			metrics := m.route
    92  
    93  			var tags classificationTags
    94  			if cfg.LabelEndpointsClassification.Enabled() || cfg.QueryEndpointsClassification.Enabled() {
    95  				if statusCodeTracking.Status == 200 {
    96  					tags = classifyRequest(w, r, classificationMetrics, opts, start, path)
    97  				} else {
    98  					// NB(nate): Don't attempt to classify failed requests since they won't have a number of
    99  					// series/metadata fetched and would skew the results of the smallest bucket if attempted,
   100  					// as a missing "result" is considered a 0.
   101  					tags = newClassificationTags()
   102  				}
   103  			}
   104  
   105  			addLatencyStatus := false
   106  			if cfg.AddStatusToLatencies {
   107  				addLatencyStatus = true
   108  			}
   109  
   110  			counter, timer := metrics.metric(path, statusCodeTracking.Status, addLatencyStatus, tags)
   111  			counter.Inc(1)
   112  			timer.Record(d)
   113  		})
   114  	}
   115  }
   116  
   117  type responseMetrics struct {
   118  	route          *routeMetrics
   119  	classification *classificationMetrics
   120  }
   121  
   122  type customMetrics struct {
   123  	sync.Mutex
   124  	metrics        map[string]responseMetrics
   125  	instrumentOpts instrument.Options
   126  }
   127  
   128  func newCustomMetrics(instrumentOpts instrument.Options) *customMetrics {
   129  	return &customMetrics{
   130  		metrics:        make(map[string]responseMetrics),
   131  		instrumentOpts: instrumentOpts,
   132  	}
   133  }
   134  
   135  func (c *customMetrics) getOrCreate(value string) *responseMetrics {
   136  	c.Lock()
   137  	defer c.Unlock()
   138  
   139  	if m, ok := c.metrics[value]; ok {
   140  		return &m
   141  	}
   142  
   143  	subscope := c.instrumentOpts.MetricsScope().Tagged(map[string]string{
   144  		metricsTypeTagName: value,
   145  	})
   146  	m := responseMetrics{
   147  		route:          newRouteMetrics(subscope),
   148  		classification: newClassificationMetrics(subscope),
   149  	}
   150  
   151  	c.metrics[value] = m
   152  	return &m
   153  }
   154  
   155  type routeMetrics struct {
   156  	sync.RWMutex
   157  	scope   tally.Scope
   158  	metrics map[routeMetricKey]routeMetric
   159  	timers  map[routeMetricKey]tally.Timer
   160  }
   161  
   162  type routeMetricKey struct {
   163  	path                   string
   164  	status                 int
   165  	resultsClassification  string
   166  	durationClassification string
   167  }
   168  
   169  func newRouteMetricKey(
   170  	path string,
   171  	status int,
   172  	tags classificationTags,
   173  ) routeMetricKey {
   174  	return routeMetricKey{
   175  		path:                   path,
   176  		status:                 status,
   177  		resultsClassification:  tags[resultsClassification],
   178  		durationClassification: tags[durationClassification],
   179  	}
   180  }
   181  
   182  type routeMetric struct {
   183  	status tally.Counter
   184  }
   185  
   186  func newRouteMetrics(scope tally.Scope) *routeMetrics {
   187  	return &routeMetrics{
   188  		scope:   scope,
   189  		metrics: make(map[routeMetricKey]routeMetric),
   190  		timers:  make(map[routeMetricKey]tally.Timer),
   191  	}
   192  }
   193  
   194  func (m *routeMetrics) metric(
   195  	path string,
   196  	status int,
   197  	addLatencyStatus bool,
   198  	tags classificationTags,
   199  ) (tally.Counter, tally.Timer) {
   200  	metricKey := newRouteMetricKey(path, status, tags)
   201  	// NB: use 0 as the status for all latency operations unless status should be
   202  	// explicitly included in written metrics.
   203  	latencyStatus := 0
   204  	if addLatencyStatus {
   205  		latencyStatus = status
   206  	}
   207  
   208  	timerKey := newRouteMetricKey(path, latencyStatus, tags)
   209  	m.RLock()
   210  	metric, ok1 := m.metrics[metricKey]
   211  	timer, ok2 := m.timers[timerKey]
   212  	m.RUnlock()
   213  	if ok1 && ok2 {
   214  		return metric.status, timer
   215  	}
   216  
   217  	m.Lock()
   218  	defer m.Unlock()
   219  
   220  	metric, ok1 = m.metrics[metricKey]
   221  	timer, ok2 = m.timers[timerKey]
   222  	if ok1 && ok2 {
   223  		return metric.status, timer
   224  	}
   225  
   226  	allTags := make(map[string]string)
   227  	for k, v := range tags {
   228  		allTags[k] = v
   229  	}
   230  	allTags["path"] = path
   231  
   232  	scopePath := m.scope.Tagged(allTags)
   233  	scopePathAndStatus := scopePath.Tagged(map[string]string{
   234  		"status": strconv.Itoa(status),
   235  	})
   236  
   237  	if !ok1 {
   238  		metric = routeMetric{
   239  			status: scopePathAndStatus.Counter("request"),
   240  		}
   241  		m.metrics[metricKey] = metric
   242  	}
   243  	if !ok2 {
   244  		scope := scopePath
   245  		if addLatencyStatus {
   246  			scope = scopePathAndStatus
   247  		}
   248  
   249  		timer = instrument.NewTimer(scope, "latency", histogramTimerOptions)
   250  		m.timers[timerKey] = timer
   251  	}
   252  
   253  	return metric.status, timer
   254  }