github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/lokifrontend/frontend/transport/handler.go (about)

     1  package transport
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"flag"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"net/http"
    11  	"net/url"
    12  	"strconv"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/go-kit/log"
    17  	"github.com/go-kit/log/level"
    18  	"github.com/prometheus/client_golang/prometheus"
    19  	"github.com/prometheus/client_golang/prometheus/promauto"
    20  	"github.com/weaveworks/common/httpgrpc"
    21  	"github.com/weaveworks/common/httpgrpc/server"
    22  
    23  	"github.com/grafana/dskit/tenant"
    24  
    25  	querier_stats "github.com/grafana/loki/pkg/querier/stats"
    26  	"github.com/grafana/loki/pkg/util"
    27  	util_log "github.com/grafana/loki/pkg/util/log"
    28  )
    29  
    30  const (
    31  	// StatusClientClosedRequest is the status code for when a client request cancellation of an http request
    32  	StatusClientClosedRequest = 499
    33  	ServiceTimingHeaderName   = "Server-Timing"
    34  )
    35  
    36  var (
    37  	errCanceled              = httpgrpc.Errorf(StatusClientClosedRequest, context.Canceled.Error())
    38  	errDeadlineExceeded      = httpgrpc.Errorf(http.StatusGatewayTimeout, context.DeadlineExceeded.Error())
    39  	errRequestEntityTooLarge = httpgrpc.Errorf(http.StatusRequestEntityTooLarge, "http: request body too large")
    40  )
    41  
    42  // Config for a Handler.
    43  type HandlerConfig struct {
    44  	LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
    45  	MaxBodySize          int64         `yaml:"max_body_size"`
    46  	QueryStatsEnabled    bool          `yaml:"query_stats_enabled"`
    47  }
    48  
    49  func (cfg *HandlerConfig) RegisterFlags(f *flag.FlagSet) {
    50  	f.DurationVar(&cfg.LogQueriesLongerThan, "frontend.log-queries-longer-than", 0, "Log queries that are slower than the specified duration. Set to 0 to disable. Set to < 0 to enable on all queries.")
    51  	f.Int64Var(&cfg.MaxBodySize, "frontend.max-body-size", 10*1024*1024, "Max body size for downstream prometheus.")
    52  	f.BoolVar(&cfg.QueryStatsEnabled, "frontend.query-stats-enabled", false, "True to enable query statistics tracking. When enabled, a message with some statistics is logged for every query.")
    53  }
    54  
    55  // Handler accepts queries and forwards them to RoundTripper. It can log slow queries,
    56  // but all other logic is inside the RoundTripper.
    57  type Handler struct {
    58  	cfg          HandlerConfig
    59  	log          log.Logger
    60  	roundTripper http.RoundTripper
    61  
    62  	// Metrics.
    63  	querySeconds *prometheus.CounterVec
    64  	querySeries  *prometheus.CounterVec
    65  	queryBytes   *prometheus.CounterVec
    66  	activeUsers  *util.ActiveUsersCleanupService
    67  }
    68  
    69  // NewHandler creates a new frontend handler.
    70  func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logger, reg prometheus.Registerer) http.Handler {
    71  	h := &Handler{
    72  		cfg:          cfg,
    73  		log:          log,
    74  		roundTripper: roundTripper,
    75  	}
    76  
    77  	if cfg.QueryStatsEnabled {
    78  		h.querySeconds = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
    79  			Name: "cortex_query_seconds_total",
    80  			Help: "Total amount of wall clock time spend processing queries.",
    81  		}, []string{"user"})
    82  
    83  		h.querySeries = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
    84  			Name: "cortex_query_fetched_series_total",
    85  			Help: "Number of series fetched to execute a query.",
    86  		}, []string{"user"})
    87  
    88  		h.queryBytes = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
    89  			Name: "cortex_query_fetched_chunks_bytes_total",
    90  			Help: "Size of all chunks fetched to execute a query in bytes.",
    91  		}, []string{"user"})
    92  
    93  		h.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(func(user string) {
    94  			h.querySeconds.DeleteLabelValues(user)
    95  			h.querySeries.DeleteLabelValues(user)
    96  			h.queryBytes.DeleteLabelValues(user)
    97  		})
    98  		// If cleaner stops or fail, we will simply not clean the metrics for inactive users.
    99  		_ = h.activeUsers.StartAsync(context.Background())
   100  	}
   101  
   102  	return h
   103  }
   104  
   105  func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
   106  	var (
   107  		stats       *querier_stats.Stats
   108  		queryString url.Values
   109  	)
   110  
   111  	// Initialise the stats in the context and make sure it's propagated
   112  	// down the request chain.
   113  	if f.cfg.QueryStatsEnabled {
   114  		var ctx context.Context
   115  		stats, ctx = querier_stats.ContextWithEmptyStats(r.Context())
   116  		r = r.WithContext(ctx)
   117  	}
   118  
   119  	defer func() {
   120  		_ = r.Body.Close()
   121  	}()
   122  
   123  	// Buffer the body for later use to track slow queries.
   124  	var buf bytes.Buffer
   125  	r.Body = http.MaxBytesReader(w, r.Body, f.cfg.MaxBodySize)
   126  	r.Body = ioutil.NopCloser(io.TeeReader(r.Body, &buf))
   127  
   128  	startTime := time.Now()
   129  	resp, err := f.roundTripper.RoundTrip(r)
   130  	queryResponseTime := time.Since(startTime)
   131  
   132  	if err != nil {
   133  		writeError(w, err)
   134  		return
   135  	}
   136  
   137  	hs := w.Header()
   138  	for h, vs := range resp.Header {
   139  		hs[h] = vs
   140  	}
   141  
   142  	if f.cfg.QueryStatsEnabled {
   143  		writeServiceTimingHeader(queryResponseTime, hs, stats)
   144  	}
   145  
   146  	w.WriteHeader(resp.StatusCode)
   147  	// we don't check for copy error as there is no much we can do at this point
   148  	_, _ = io.Copy(w, resp.Body)
   149  
   150  	// Check whether we should parse the query string.
   151  	shouldReportSlowQuery := f.cfg.LogQueriesLongerThan > 0 && queryResponseTime > f.cfg.LogQueriesLongerThan
   152  	if shouldReportSlowQuery || f.cfg.QueryStatsEnabled {
   153  		queryString = f.parseRequestQueryString(r, buf)
   154  	}
   155  
   156  	if shouldReportSlowQuery {
   157  		f.reportSlowQuery(r, queryString, queryResponseTime)
   158  	}
   159  	if f.cfg.QueryStatsEnabled {
   160  		f.reportQueryStats(r, queryString, queryResponseTime, stats)
   161  	}
   162  }
   163  
   164  // reportSlowQuery reports slow queries.
   165  func (f *Handler) reportSlowQuery(r *http.Request, queryString url.Values, queryResponseTime time.Duration) {
   166  	logMessage := append([]interface{}{
   167  		"msg", "slow query detected",
   168  		"method", r.Method,
   169  		"host", r.Host,
   170  		"path", r.URL.Path,
   171  		"time_taken", queryResponseTime.String(),
   172  	}, formatQueryString(queryString)...)
   173  
   174  	level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
   175  }
   176  
   177  func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, queryResponseTime time.Duration, stats *querier_stats.Stats) {
   178  	tenantIDs, err := tenant.TenantIDs(r.Context())
   179  	if err != nil {
   180  		return
   181  	}
   182  	userID := tenant.JoinTenantIDs(tenantIDs)
   183  	wallTime := stats.LoadWallTime()
   184  	numSeries := stats.LoadFetchedSeries()
   185  	numBytes := stats.LoadFetchedChunkBytes()
   186  
   187  	// Track stats.
   188  	f.querySeconds.WithLabelValues(userID).Add(wallTime.Seconds())
   189  	f.querySeries.WithLabelValues(userID).Add(float64(numSeries))
   190  	f.queryBytes.WithLabelValues(userID).Add(float64(numBytes))
   191  	f.activeUsers.UpdateUserTimestamp(userID, time.Now())
   192  
   193  	// Log stats.
   194  	logMessage := append([]interface{}{
   195  		"msg", "query stats",
   196  		"component", "query-frontend",
   197  		"method", r.Method,
   198  		"path", r.URL.Path,
   199  		"response_time", queryResponseTime,
   200  		"query_wall_time_seconds", wallTime.Seconds(),
   201  		"fetched_series_count", numSeries,
   202  		"fetched_chunks_bytes", numBytes,
   203  	}, formatQueryString(queryString)...)
   204  
   205  	level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
   206  }
   207  
   208  func (f *Handler) parseRequestQueryString(r *http.Request, bodyBuf bytes.Buffer) url.Values {
   209  	// Use previously buffered body.
   210  	r.Body = ioutil.NopCloser(&bodyBuf)
   211  
   212  	// Ensure the form has been parsed so all the parameters are present
   213  	err := r.ParseForm()
   214  	if err != nil {
   215  		level.Warn(util_log.WithContext(r.Context(), f.log)).Log("msg", "unable to parse request form", "err", err)
   216  		return nil
   217  	}
   218  
   219  	return r.Form
   220  }
   221  
   222  func formatQueryString(queryString url.Values) (fields []interface{}) {
   223  	for k, v := range queryString {
   224  		fields = append(fields, fmt.Sprintf("param_%s", k), strings.Join(v, ","))
   225  	}
   226  	return fields
   227  }
   228  
   229  func writeError(w http.ResponseWriter, err error) {
   230  	switch err {
   231  	case context.Canceled:
   232  		err = errCanceled
   233  	case context.DeadlineExceeded:
   234  		err = errDeadlineExceeded
   235  	default:
   236  		if util.IsRequestBodyTooLarge(err) {
   237  			err = errRequestEntityTooLarge
   238  		}
   239  	}
   240  	server.WriteError(w, err)
   241  }
   242  
   243  func writeServiceTimingHeader(queryResponseTime time.Duration, headers http.Header, stats *querier_stats.Stats) {
   244  	if stats != nil {
   245  		parts := make([]string, 0)
   246  		parts = append(parts, statsValue("querier_wall_time", stats.LoadWallTime()))
   247  		parts = append(parts, statsValue("response_time", queryResponseTime))
   248  		headers.Set(ServiceTimingHeaderName, strings.Join(parts, ", "))
   249  	}
   250  }
   251  
   252  func statsValue(name string, d time.Duration) string {
   253  	durationInMs := strconv.FormatFloat(float64(d)/float64(time.Millisecond), 'f', -1, 64)
   254  	return name + ";dur=" + durationInMs
   255  }