github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/frontend/transport/handler.go (about) 1 package transport 2 3 import ( 4 "bytes" 5 "context" 6 "flag" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "net/http" 11 "net/url" 12 "strconv" 13 "strings" 14 "time" 15 16 "github.com/go-kit/log" 17 "github.com/go-kit/log/level" 18 "github.com/prometheus/client_golang/prometheus" 19 "github.com/prometheus/client_golang/prometheus/promauto" 20 "github.com/weaveworks/common/httpgrpc" 21 "github.com/weaveworks/common/httpgrpc/server" 22 23 querier_stats "github.com/cortexproject/cortex/pkg/querier/stats" 24 "github.com/cortexproject/cortex/pkg/tenant" 25 "github.com/cortexproject/cortex/pkg/util" 26 util_log "github.com/cortexproject/cortex/pkg/util/log" 27 ) 28 29 const ( 30 // StatusClientClosedRequest is the status code for when a client request cancellation of an http request 31 StatusClientClosedRequest = 499 32 ServiceTimingHeaderName = "Server-Timing" 33 ) 34 35 var ( 36 errCanceled = httpgrpc.Errorf(StatusClientClosedRequest, context.Canceled.Error()) 37 errDeadlineExceeded = httpgrpc.Errorf(http.StatusGatewayTimeout, context.DeadlineExceeded.Error()) 38 errRequestEntityTooLarge = httpgrpc.Errorf(http.StatusRequestEntityTooLarge, "http: request body too large") 39 ) 40 41 // Config for a Handler. 42 type HandlerConfig struct { 43 LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"` 44 MaxBodySize int64 `yaml:"max_body_size"` 45 QueryStatsEnabled bool `yaml:"query_stats_enabled"` 46 } 47 48 func (cfg *HandlerConfig) RegisterFlags(f *flag.FlagSet) { 49 f.DurationVar(&cfg.LogQueriesLongerThan, "frontend.log-queries-longer-than", 0, "Log queries that are slower than the specified duration. Set to 0 to disable. Set to < 0 to enable on all queries.") 50 f.Int64Var(&cfg.MaxBodySize, "frontend.max-body-size", 10*1024*1024, "Max body size for downstream prometheus.") 51 f.BoolVar(&cfg.QueryStatsEnabled, "frontend.query-stats-enabled", false, "True to enable query statistics tracking. When enabled, a message with some statistics is logged for every query.") 52 } 53 54 // Handler accepts queries and forwards them to RoundTripper. It can log slow queries, 55 // but all other logic is inside the RoundTripper. 56 type Handler struct { 57 cfg HandlerConfig 58 log log.Logger 59 roundTripper http.RoundTripper 60 61 // Metrics. 62 querySeconds *prometheus.CounterVec 63 querySeries *prometheus.CounterVec 64 queryBytes *prometheus.CounterVec 65 activeUsers *util.ActiveUsersCleanupService 66 } 67 68 // NewHandler creates a new frontend handler. 69 func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logger, reg prometheus.Registerer) http.Handler { 70 h := &Handler{ 71 cfg: cfg, 72 log: log, 73 roundTripper: roundTripper, 74 } 75 76 if cfg.QueryStatsEnabled { 77 h.querySeconds = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 78 Name: "cortex_query_seconds_total", 79 Help: "Total amount of wall clock time spend processing queries.", 80 }, []string{"user"}) 81 82 h.querySeries = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 83 Name: "cortex_query_fetched_series_total", 84 Help: "Number of series fetched to execute a query.", 85 }, []string{"user"}) 86 87 h.queryBytes = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 88 Name: "cortex_query_fetched_chunks_bytes_total", 89 Help: "Size of all chunks fetched to execute a query in bytes.", 90 }, []string{"user"}) 91 92 h.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(func(user string) { 93 h.querySeconds.DeleteLabelValues(user) 94 h.querySeries.DeleteLabelValues(user) 95 h.queryBytes.DeleteLabelValues(user) 96 }) 97 // If cleaner stops or fail, we will simply not clean the metrics for inactive users. 98 _ = h.activeUsers.StartAsync(context.Background()) 99 } 100 101 return h 102 } 103 104 func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 105 var ( 106 stats *querier_stats.Stats 107 queryString url.Values 108 ) 109 110 // Initialise the stats in the context and make sure it's propagated 111 // down the request chain. 112 if f.cfg.QueryStatsEnabled { 113 var ctx context.Context 114 stats, ctx = querier_stats.ContextWithEmptyStats(r.Context()) 115 r = r.WithContext(ctx) 116 } 117 118 defer func() { 119 _ = r.Body.Close() 120 }() 121 122 // Buffer the body for later use to track slow queries. 123 var buf bytes.Buffer 124 r.Body = http.MaxBytesReader(w, r.Body, f.cfg.MaxBodySize) 125 r.Body = ioutil.NopCloser(io.TeeReader(r.Body, &buf)) 126 127 startTime := time.Now() 128 resp, err := f.roundTripper.RoundTrip(r) 129 queryResponseTime := time.Since(startTime) 130 131 if err != nil { 132 writeError(w, err) 133 return 134 } 135 136 hs := w.Header() 137 for h, vs := range resp.Header { 138 hs[h] = vs 139 } 140 141 if f.cfg.QueryStatsEnabled { 142 writeServiceTimingHeader(queryResponseTime, hs, stats) 143 } 144 145 w.WriteHeader(resp.StatusCode) 146 // we don't check for copy error as there is no much we can do at this point 147 _, _ = io.Copy(w, resp.Body) 148 149 // Check whether we should parse the query string. 150 shouldReportSlowQuery := f.cfg.LogQueriesLongerThan > 0 && queryResponseTime > f.cfg.LogQueriesLongerThan 151 if shouldReportSlowQuery || f.cfg.QueryStatsEnabled { 152 queryString = f.parseRequestQueryString(r, buf) 153 } 154 155 if shouldReportSlowQuery { 156 f.reportSlowQuery(r, queryString, queryResponseTime) 157 } 158 if f.cfg.QueryStatsEnabled { 159 f.reportQueryStats(r, queryString, queryResponseTime, stats) 160 } 161 } 162 163 // reportSlowQuery reports slow queries. 164 func (f *Handler) reportSlowQuery(r *http.Request, queryString url.Values, queryResponseTime time.Duration) { 165 logMessage := append([]interface{}{ 166 "msg", "slow query detected", 167 "method", r.Method, 168 "host", r.Host, 169 "path", r.URL.Path, 170 "time_taken", queryResponseTime.String(), 171 }, formatQueryString(queryString)...) 172 173 level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...) 174 } 175 176 func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, queryResponseTime time.Duration, stats *querier_stats.Stats) { 177 tenantIDs, err := tenant.TenantIDs(r.Context()) 178 if err != nil { 179 return 180 } 181 userID := tenant.JoinTenantIDs(tenantIDs) 182 wallTime := stats.LoadWallTime() 183 numSeries := stats.LoadFetchedSeries() 184 numBytes := stats.LoadFetchedChunkBytes() 185 186 // Track stats. 187 f.querySeconds.WithLabelValues(userID).Add(wallTime.Seconds()) 188 f.querySeries.WithLabelValues(userID).Add(float64(numSeries)) 189 f.queryBytes.WithLabelValues(userID).Add(float64(numBytes)) 190 f.activeUsers.UpdateUserTimestamp(userID, time.Now()) 191 192 // Log stats. 193 logMessage := append([]interface{}{ 194 "msg", "query stats", 195 "component", "query-frontend", 196 "method", r.Method, 197 "path", r.URL.Path, 198 "response_time", queryResponseTime, 199 "query_wall_time_seconds", wallTime.Seconds(), 200 "fetched_series_count", numSeries, 201 "fetched_chunks_bytes", numBytes, 202 }, formatQueryString(queryString)...) 203 204 level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...) 205 } 206 207 func (f *Handler) parseRequestQueryString(r *http.Request, bodyBuf bytes.Buffer) url.Values { 208 // Use previously buffered body. 209 r.Body = ioutil.NopCloser(&bodyBuf) 210 211 // Ensure the form has been parsed so all the parameters are present 212 err := r.ParseForm() 213 if err != nil { 214 level.Warn(util_log.WithContext(r.Context(), f.log)).Log("msg", "unable to parse request form", "err", err) 215 return nil 216 } 217 218 return r.Form 219 } 220 221 func formatQueryString(queryString url.Values) (fields []interface{}) { 222 for k, v := range queryString { 223 fields = append(fields, fmt.Sprintf("param_%s", k), strings.Join(v, ",")) 224 } 225 return fields 226 } 227 228 func writeError(w http.ResponseWriter, err error) { 229 switch err { 230 case context.Canceled: 231 err = errCanceled 232 case context.DeadlineExceeded: 233 err = errDeadlineExceeded 234 default: 235 if util.IsRequestBodyTooLarge(err) { 236 err = errRequestEntityTooLarge 237 } 238 } 239 server.WriteError(w, err) 240 } 241 242 func writeServiceTimingHeader(queryResponseTime time.Duration, headers http.Header, stats *querier_stats.Stats) { 243 if stats != nil { 244 parts := make([]string, 0) 245 parts = append(parts, statsValue("querier_wall_time", stats.LoadWallTime())) 246 parts = append(parts, statsValue("response_time", queryResponseTime)) 247 headers.Set(ServiceTimingHeaderName, strings.Join(parts, ", ")) 248 } 249 } 250 251 func statsValue(name string, d time.Duration) string { 252 durationInMs := strconv.FormatFloat(float64(d)/float64(time.Millisecond), 'f', -1, 64) 253 return name + ";dur=" + durationInMs 254 }