github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/lokifrontend/frontend/transport/handler.go (about) 1 package transport 2 3 import ( 4 "bytes" 5 "context" 6 "flag" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "net/http" 11 "net/url" 12 "strconv" 13 "strings" 14 "time" 15 16 "github.com/go-kit/log" 17 "github.com/go-kit/log/level" 18 "github.com/prometheus/client_golang/prometheus" 19 "github.com/prometheus/client_golang/prometheus/promauto" 20 "github.com/weaveworks/common/httpgrpc" 21 "github.com/weaveworks/common/httpgrpc/server" 22 23 "github.com/grafana/dskit/tenant" 24 25 querier_stats "github.com/grafana/loki/pkg/querier/stats" 26 "github.com/grafana/loki/pkg/util" 27 util_log "github.com/grafana/loki/pkg/util/log" 28 ) 29 30 const ( 31 // StatusClientClosedRequest is the status code for when a client request cancellation of an http request 32 StatusClientClosedRequest = 499 33 ServiceTimingHeaderName = "Server-Timing" 34 ) 35 36 var ( 37 errCanceled = httpgrpc.Errorf(StatusClientClosedRequest, context.Canceled.Error()) 38 errDeadlineExceeded = httpgrpc.Errorf(http.StatusGatewayTimeout, context.DeadlineExceeded.Error()) 39 errRequestEntityTooLarge = httpgrpc.Errorf(http.StatusRequestEntityTooLarge, "http: request body too large") 40 ) 41 42 // Config for a Handler. 43 type HandlerConfig struct { 44 LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"` 45 MaxBodySize int64 `yaml:"max_body_size"` 46 QueryStatsEnabled bool `yaml:"query_stats_enabled"` 47 } 48 49 func (cfg *HandlerConfig) RegisterFlags(f *flag.FlagSet) { 50 f.DurationVar(&cfg.LogQueriesLongerThan, "frontend.log-queries-longer-than", 0, "Log queries that are slower than the specified duration. Set to 0 to disable. Set to < 0 to enable on all queries.") 51 f.Int64Var(&cfg.MaxBodySize, "frontend.max-body-size", 10*1024*1024, "Max body size for downstream prometheus.") 52 f.BoolVar(&cfg.QueryStatsEnabled, "frontend.query-stats-enabled", false, "True to enable query statistics tracking. When enabled, a message with some statistics is logged for every query.") 53 } 54 55 // Handler accepts queries and forwards them to RoundTripper. It can log slow queries, 56 // but all other logic is inside the RoundTripper. 57 type Handler struct { 58 cfg HandlerConfig 59 log log.Logger 60 roundTripper http.RoundTripper 61 62 // Metrics. 63 querySeconds *prometheus.CounterVec 64 querySeries *prometheus.CounterVec 65 queryBytes *prometheus.CounterVec 66 activeUsers *util.ActiveUsersCleanupService 67 } 68 69 // NewHandler creates a new frontend handler. 70 func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logger, reg prometheus.Registerer) http.Handler { 71 h := &Handler{ 72 cfg: cfg, 73 log: log, 74 roundTripper: roundTripper, 75 } 76 77 if cfg.QueryStatsEnabled { 78 h.querySeconds = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 79 Name: "cortex_query_seconds_total", 80 Help: "Total amount of wall clock time spend processing queries.", 81 }, []string{"user"}) 82 83 h.querySeries = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 84 Name: "cortex_query_fetched_series_total", 85 Help: "Number of series fetched to execute a query.", 86 }, []string{"user"}) 87 88 h.queryBytes = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 89 Name: "cortex_query_fetched_chunks_bytes_total", 90 Help: "Size of all chunks fetched to execute a query in bytes.", 91 }, []string{"user"}) 92 93 h.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(func(user string) { 94 h.querySeconds.DeleteLabelValues(user) 95 h.querySeries.DeleteLabelValues(user) 96 h.queryBytes.DeleteLabelValues(user) 97 }) 98 // If cleaner stops or fail, we will simply not clean the metrics for inactive users. 99 _ = h.activeUsers.StartAsync(context.Background()) 100 } 101 102 return h 103 } 104 105 func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 106 var ( 107 stats *querier_stats.Stats 108 queryString url.Values 109 ) 110 111 // Initialise the stats in the context and make sure it's propagated 112 // down the request chain. 113 if f.cfg.QueryStatsEnabled { 114 var ctx context.Context 115 stats, ctx = querier_stats.ContextWithEmptyStats(r.Context()) 116 r = r.WithContext(ctx) 117 } 118 119 defer func() { 120 _ = r.Body.Close() 121 }() 122 123 // Buffer the body for later use to track slow queries. 124 var buf bytes.Buffer 125 r.Body = http.MaxBytesReader(w, r.Body, f.cfg.MaxBodySize) 126 r.Body = ioutil.NopCloser(io.TeeReader(r.Body, &buf)) 127 128 startTime := time.Now() 129 resp, err := f.roundTripper.RoundTrip(r) 130 queryResponseTime := time.Since(startTime) 131 132 if err != nil { 133 writeError(w, err) 134 return 135 } 136 137 hs := w.Header() 138 for h, vs := range resp.Header { 139 hs[h] = vs 140 } 141 142 if f.cfg.QueryStatsEnabled { 143 writeServiceTimingHeader(queryResponseTime, hs, stats) 144 } 145 146 w.WriteHeader(resp.StatusCode) 147 // we don't check for copy error as there is no much we can do at this point 148 _, _ = io.Copy(w, resp.Body) 149 150 // Check whether we should parse the query string. 151 shouldReportSlowQuery := f.cfg.LogQueriesLongerThan > 0 && queryResponseTime > f.cfg.LogQueriesLongerThan 152 if shouldReportSlowQuery || f.cfg.QueryStatsEnabled { 153 queryString = f.parseRequestQueryString(r, buf) 154 } 155 156 if shouldReportSlowQuery { 157 f.reportSlowQuery(r, queryString, queryResponseTime) 158 } 159 if f.cfg.QueryStatsEnabled { 160 f.reportQueryStats(r, queryString, queryResponseTime, stats) 161 } 162 } 163 164 // reportSlowQuery reports slow queries. 165 func (f *Handler) reportSlowQuery(r *http.Request, queryString url.Values, queryResponseTime time.Duration) { 166 logMessage := append([]interface{}{ 167 "msg", "slow query detected", 168 "method", r.Method, 169 "host", r.Host, 170 "path", r.URL.Path, 171 "time_taken", queryResponseTime.String(), 172 }, formatQueryString(queryString)...) 173 174 level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...) 175 } 176 177 func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, queryResponseTime time.Duration, stats *querier_stats.Stats) { 178 tenantIDs, err := tenant.TenantIDs(r.Context()) 179 if err != nil { 180 return 181 } 182 userID := tenant.JoinTenantIDs(tenantIDs) 183 wallTime := stats.LoadWallTime() 184 numSeries := stats.LoadFetchedSeries() 185 numBytes := stats.LoadFetchedChunkBytes() 186 187 // Track stats. 188 f.querySeconds.WithLabelValues(userID).Add(wallTime.Seconds()) 189 f.querySeries.WithLabelValues(userID).Add(float64(numSeries)) 190 f.queryBytes.WithLabelValues(userID).Add(float64(numBytes)) 191 f.activeUsers.UpdateUserTimestamp(userID, time.Now()) 192 193 // Log stats. 194 logMessage := append([]interface{}{ 195 "msg", "query stats", 196 "component", "query-frontend", 197 "method", r.Method, 198 "path", r.URL.Path, 199 "response_time", queryResponseTime, 200 "query_wall_time_seconds", wallTime.Seconds(), 201 "fetched_series_count", numSeries, 202 "fetched_chunks_bytes", numBytes, 203 }, formatQueryString(queryString)...) 204 205 level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...) 206 } 207 208 func (f *Handler) parseRequestQueryString(r *http.Request, bodyBuf bytes.Buffer) url.Values { 209 // Use previously buffered body. 210 r.Body = ioutil.NopCloser(&bodyBuf) 211 212 // Ensure the form has been parsed so all the parameters are present 213 err := r.ParseForm() 214 if err != nil { 215 level.Warn(util_log.WithContext(r.Context(), f.log)).Log("msg", "unable to parse request form", "err", err) 216 return nil 217 } 218 219 return r.Form 220 } 221 222 func formatQueryString(queryString url.Values) (fields []interface{}) { 223 for k, v := range queryString { 224 fields = append(fields, fmt.Sprintf("param_%s", k), strings.Join(v, ",")) 225 } 226 return fields 227 } 228 229 func writeError(w http.ResponseWriter, err error) { 230 switch err { 231 case context.Canceled: 232 err = errCanceled 233 case context.DeadlineExceeded: 234 err = errDeadlineExceeded 235 default: 236 if util.IsRequestBodyTooLarge(err) { 237 err = errRequestEntityTooLarge 238 } 239 } 240 server.WriteError(w, err) 241 } 242 243 func writeServiceTimingHeader(queryResponseTime time.Duration, headers http.Header, stats *querier_stats.Stats) { 244 if stats != nil { 245 parts := make([]string, 0) 246 parts = append(parts, statsValue("querier_wall_time", stats.LoadWallTime())) 247 parts = append(parts, statsValue("response_time", queryResponseTime)) 248 headers.Set(ServiceTimingHeaderName, strings.Join(parts, ", ")) 249 } 250 } 251 252 func statsValue(name string, d time.Duration) string { 253 durationInMs := strconv.FormatFloat(float64(d)/float64(time.Millisecond), 'f', -1, 64) 254 return name + ";dur=" + durationInMs 255 }