github.com/m3db/m3@v1.5.0/src/query/api/v1/middleware/metrics.go (about) 1 // Copyright (c) 2021 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package middleware 22 23 import ( 24 "net/http" 25 "strconv" 26 "sync" 27 "time" 28 29 "github.com/gorilla/mux" 30 "github.com/uber-go/tally" 31 32 "github.com/m3db/m3/src/cmd/services/m3query/config" 33 "github.com/m3db/m3/src/query/parser/promql" 34 "github.com/m3db/m3/src/x/headers" 35 xhttp "github.com/m3db/m3/src/x/http" 36 "github.com/m3db/m3/src/x/instrument" 37 ) 38 39 const ( 40 metricsTypeTagName = "type" 41 metricsTypeTagDefaultValue = "coordinator" 42 ) 43 44 var histogramTimerOptions = instrument.NewHistogramTimerOptions( 45 instrument.HistogramTimerOptions{ 46 // Use sparse histogram timer buckets to not overload with latency metrics. 47 HistogramBuckets: instrument.SparseHistogramTimerHistogramBuckets(), 48 }) 49 50 // MetricsOptions are the options for the metrics middleware. 51 type MetricsOptions struct { 52 Config config.MetricsMiddlewareConfiguration 53 ParseQueryParams ParseQueryParams 54 ParseOptions promql.ParseOptions 55 } 56 57 // ResponseMetrics records metrics for the http response. 58 func ResponseMetrics(opts Options) mux.MiddlewareFunc { 59 var ( 60 iOpts = opts.InstrumentOpts 61 route = opts.Route 62 cfg = opts.Metrics.Config 63 ) 64 65 custom := newCustomMetrics(iOpts) 66 return func(base http.Handler) http.Handler { 67 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 68 statusCodeTracking := &xhttp.StatusCodeTracker{ResponseWriter: w} 69 w = statusCodeTracking.WrappedResponseWriter() 70 71 start := time.Now() 72 base.ServeHTTP(w, r) 73 d := time.Since(start) 74 75 if !statusCodeTracking.WroteHeader { 76 return 77 } 78 79 path, err := route.GetPathTemplate() 80 if err != nil { 81 path = "unknown" 82 } 83 84 metricsType := r.Header.Get(headers.CustomResponseMetricsType) 85 if len(metricsType) == 0 { 86 metricsType = metricsTypeTagDefaultValue 87 } 88 89 m := custom.getOrCreate(metricsType) 90 classificationMetrics := m.classification 91 metrics := m.route 92 93 var tags classificationTags 94 if cfg.LabelEndpointsClassification.Enabled() || cfg.QueryEndpointsClassification.Enabled() { 95 if statusCodeTracking.Status == 200 { 96 tags = classifyRequest(w, r, classificationMetrics, opts, start, path) 97 } else { 98 // NB(nate): Don't attempt to classify failed requests since they won't have a number of 99 // series/metadata fetched and would skew the results of the smallest bucket if attempted, 100 // as a missing "result" is considered a 0. 101 tags = newClassificationTags() 102 } 103 } 104 105 addLatencyStatus := false 106 if cfg.AddStatusToLatencies { 107 addLatencyStatus = true 108 } 109 110 counter, timer := metrics.metric(path, statusCodeTracking.Status, addLatencyStatus, tags) 111 counter.Inc(1) 112 timer.Record(d) 113 }) 114 } 115 } 116 117 type responseMetrics struct { 118 route *routeMetrics 119 classification *classificationMetrics 120 } 121 122 type customMetrics struct { 123 sync.Mutex 124 metrics map[string]responseMetrics 125 instrumentOpts instrument.Options 126 } 127 128 func newCustomMetrics(instrumentOpts instrument.Options) *customMetrics { 129 return &customMetrics{ 130 metrics: make(map[string]responseMetrics), 131 instrumentOpts: instrumentOpts, 132 } 133 } 134 135 func (c *customMetrics) getOrCreate(value string) *responseMetrics { 136 c.Lock() 137 defer c.Unlock() 138 139 if m, ok := c.metrics[value]; ok { 140 return &m 141 } 142 143 subscope := c.instrumentOpts.MetricsScope().Tagged(map[string]string{ 144 metricsTypeTagName: value, 145 }) 146 m := responseMetrics{ 147 route: newRouteMetrics(subscope), 148 classification: newClassificationMetrics(subscope), 149 } 150 151 c.metrics[value] = m 152 return &m 153 } 154 155 type routeMetrics struct { 156 sync.RWMutex 157 scope tally.Scope 158 metrics map[routeMetricKey]routeMetric 159 timers map[routeMetricKey]tally.Timer 160 } 161 162 type routeMetricKey struct { 163 path string 164 status int 165 resultsClassification string 166 durationClassification string 167 } 168 169 func newRouteMetricKey( 170 path string, 171 status int, 172 tags classificationTags, 173 ) routeMetricKey { 174 return routeMetricKey{ 175 path: path, 176 status: status, 177 resultsClassification: tags[resultsClassification], 178 durationClassification: tags[durationClassification], 179 } 180 } 181 182 type routeMetric struct { 183 status tally.Counter 184 } 185 186 func newRouteMetrics(scope tally.Scope) *routeMetrics { 187 return &routeMetrics{ 188 scope: scope, 189 metrics: make(map[routeMetricKey]routeMetric), 190 timers: make(map[routeMetricKey]tally.Timer), 191 } 192 } 193 194 func (m *routeMetrics) metric( 195 path string, 196 status int, 197 addLatencyStatus bool, 198 tags classificationTags, 199 ) (tally.Counter, tally.Timer) { 200 metricKey := newRouteMetricKey(path, status, tags) 201 // NB: use 0 as the status for all latency operations unless status should be 202 // explicitly included in written metrics. 203 latencyStatus := 0 204 if addLatencyStatus { 205 latencyStatus = status 206 } 207 208 timerKey := newRouteMetricKey(path, latencyStatus, tags) 209 m.RLock() 210 metric, ok1 := m.metrics[metricKey] 211 timer, ok2 := m.timers[timerKey] 212 m.RUnlock() 213 if ok1 && ok2 { 214 return metric.status, timer 215 } 216 217 m.Lock() 218 defer m.Unlock() 219 220 metric, ok1 = m.metrics[metricKey] 221 timer, ok2 = m.timers[timerKey] 222 if ok1 && ok2 { 223 return metric.status, timer 224 } 225 226 allTags := make(map[string]string) 227 for k, v := range tags { 228 allTags[k] = v 229 } 230 allTags["path"] = path 231 232 scopePath := m.scope.Tagged(allTags) 233 scopePathAndStatus := scopePath.Tagged(map[string]string{ 234 "status": strconv.Itoa(status), 235 }) 236 237 if !ok1 { 238 metric = routeMetric{ 239 status: scopePathAndStatus.Counter("request"), 240 } 241 m.metrics[metricKey] = metric 242 } 243 if !ok2 { 244 scope := scopePath 245 if addLatencyStatus { 246 scope = scopePathAndStatus 247 } 248 249 timer = instrument.NewTimer(scope, "latency", histogramTimerOptions) 250 m.timers[timerKey] = timer 251 } 252 253 return metric.status, timer 254 }