github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/frontend/transport/handler.go (about)

     1  package transport
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"flag"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"net/http"
    11  	"net/url"
    12  	"strconv"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/go-kit/log"
    17  	"github.com/go-kit/log/level"
    18  	"github.com/prometheus/client_golang/prometheus"
    19  	"github.com/prometheus/client_golang/prometheus/promauto"
    20  	"github.com/weaveworks/common/httpgrpc"
    21  	"github.com/weaveworks/common/httpgrpc/server"
    22  
    23  	querier_stats "github.com/cortexproject/cortex/pkg/querier/stats"
    24  	"github.com/cortexproject/cortex/pkg/tenant"
    25  	"github.com/cortexproject/cortex/pkg/util"
    26  	util_log "github.com/cortexproject/cortex/pkg/util/log"
    27  )
    28  
    29  const (
    30  	// StatusClientClosedRequest is the status code for when a client request cancellation of an http request
    31  	StatusClientClosedRequest = 499
    32  	ServiceTimingHeaderName   = "Server-Timing"
    33  )
    34  
    35  var (
    36  	errCanceled              = httpgrpc.Errorf(StatusClientClosedRequest, context.Canceled.Error())
    37  	errDeadlineExceeded      = httpgrpc.Errorf(http.StatusGatewayTimeout, context.DeadlineExceeded.Error())
    38  	errRequestEntityTooLarge = httpgrpc.Errorf(http.StatusRequestEntityTooLarge, "http: request body too large")
    39  )
    40  
    41  // Config for a Handler.
    42  type HandlerConfig struct {
    43  	LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
    44  	MaxBodySize          int64         `yaml:"max_body_size"`
    45  	QueryStatsEnabled    bool          `yaml:"query_stats_enabled"`
    46  }
    47  
    48  func (cfg *HandlerConfig) RegisterFlags(f *flag.FlagSet) {
    49  	f.DurationVar(&cfg.LogQueriesLongerThan, "frontend.log-queries-longer-than", 0, "Log queries that are slower than the specified duration. Set to 0 to disable. Set to < 0 to enable on all queries.")
    50  	f.Int64Var(&cfg.MaxBodySize, "frontend.max-body-size", 10*1024*1024, "Max body size for downstream prometheus.")
    51  	f.BoolVar(&cfg.QueryStatsEnabled, "frontend.query-stats-enabled", false, "True to enable query statistics tracking. When enabled, a message with some statistics is logged for every query.")
    52  }
    53  
    54  // Handler accepts queries and forwards them to RoundTripper. It can log slow queries,
    55  // but all other logic is inside the RoundTripper.
    56  type Handler struct {
    57  	cfg          HandlerConfig
    58  	log          log.Logger
    59  	roundTripper http.RoundTripper
    60  
    61  	// Metrics.
    62  	querySeconds *prometheus.CounterVec
    63  	querySeries  *prometheus.CounterVec
    64  	queryBytes   *prometheus.CounterVec
    65  	activeUsers  *util.ActiveUsersCleanupService
    66  }
    67  
    68  // NewHandler creates a new frontend handler.
    69  func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logger, reg prometheus.Registerer) http.Handler {
    70  	h := &Handler{
    71  		cfg:          cfg,
    72  		log:          log,
    73  		roundTripper: roundTripper,
    74  	}
    75  
    76  	if cfg.QueryStatsEnabled {
    77  		h.querySeconds = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
    78  			Name: "cortex_query_seconds_total",
    79  			Help: "Total amount of wall clock time spend processing queries.",
    80  		}, []string{"user"})
    81  
    82  		h.querySeries = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
    83  			Name: "cortex_query_fetched_series_total",
    84  			Help: "Number of series fetched to execute a query.",
    85  		}, []string{"user"})
    86  
    87  		h.queryBytes = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
    88  			Name: "cortex_query_fetched_chunks_bytes_total",
    89  			Help: "Size of all chunks fetched to execute a query in bytes.",
    90  		}, []string{"user"})
    91  
    92  		h.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(func(user string) {
    93  			h.querySeconds.DeleteLabelValues(user)
    94  			h.querySeries.DeleteLabelValues(user)
    95  			h.queryBytes.DeleteLabelValues(user)
    96  		})
    97  		// If cleaner stops or fail, we will simply not clean the metrics for inactive users.
    98  		_ = h.activeUsers.StartAsync(context.Background())
    99  	}
   100  
   101  	return h
   102  }
   103  
   104  func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
   105  	var (
   106  		stats       *querier_stats.Stats
   107  		queryString url.Values
   108  	)
   109  
   110  	// Initialise the stats in the context and make sure it's propagated
   111  	// down the request chain.
   112  	if f.cfg.QueryStatsEnabled {
   113  		var ctx context.Context
   114  		stats, ctx = querier_stats.ContextWithEmptyStats(r.Context())
   115  		r = r.WithContext(ctx)
   116  	}
   117  
   118  	defer func() {
   119  		_ = r.Body.Close()
   120  	}()
   121  
   122  	// Buffer the body for later use to track slow queries.
   123  	var buf bytes.Buffer
   124  	r.Body = http.MaxBytesReader(w, r.Body, f.cfg.MaxBodySize)
   125  	r.Body = ioutil.NopCloser(io.TeeReader(r.Body, &buf))
   126  
   127  	startTime := time.Now()
   128  	resp, err := f.roundTripper.RoundTrip(r)
   129  	queryResponseTime := time.Since(startTime)
   130  
   131  	if err != nil {
   132  		writeError(w, err)
   133  		return
   134  	}
   135  
   136  	hs := w.Header()
   137  	for h, vs := range resp.Header {
   138  		hs[h] = vs
   139  	}
   140  
   141  	if f.cfg.QueryStatsEnabled {
   142  		writeServiceTimingHeader(queryResponseTime, hs, stats)
   143  	}
   144  
   145  	w.WriteHeader(resp.StatusCode)
   146  	// we don't check for copy error as there is no much we can do at this point
   147  	_, _ = io.Copy(w, resp.Body)
   148  
   149  	// Check whether we should parse the query string.
   150  	shouldReportSlowQuery := f.cfg.LogQueriesLongerThan > 0 && queryResponseTime > f.cfg.LogQueriesLongerThan
   151  	if shouldReportSlowQuery || f.cfg.QueryStatsEnabled {
   152  		queryString = f.parseRequestQueryString(r, buf)
   153  	}
   154  
   155  	if shouldReportSlowQuery {
   156  		f.reportSlowQuery(r, queryString, queryResponseTime)
   157  	}
   158  	if f.cfg.QueryStatsEnabled {
   159  		f.reportQueryStats(r, queryString, queryResponseTime, stats)
   160  	}
   161  }
   162  
   163  // reportSlowQuery reports slow queries.
   164  func (f *Handler) reportSlowQuery(r *http.Request, queryString url.Values, queryResponseTime time.Duration) {
   165  	logMessage := append([]interface{}{
   166  		"msg", "slow query detected",
   167  		"method", r.Method,
   168  		"host", r.Host,
   169  		"path", r.URL.Path,
   170  		"time_taken", queryResponseTime.String(),
   171  	}, formatQueryString(queryString)...)
   172  
   173  	level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
   174  }
   175  
   176  func (f *Handler) reportQueryStats(r *http.Request, queryString url.Values, queryResponseTime time.Duration, stats *querier_stats.Stats) {
   177  	tenantIDs, err := tenant.TenantIDs(r.Context())
   178  	if err != nil {
   179  		return
   180  	}
   181  	userID := tenant.JoinTenantIDs(tenantIDs)
   182  	wallTime := stats.LoadWallTime()
   183  	numSeries := stats.LoadFetchedSeries()
   184  	numBytes := stats.LoadFetchedChunkBytes()
   185  
   186  	// Track stats.
   187  	f.querySeconds.WithLabelValues(userID).Add(wallTime.Seconds())
   188  	f.querySeries.WithLabelValues(userID).Add(float64(numSeries))
   189  	f.queryBytes.WithLabelValues(userID).Add(float64(numBytes))
   190  	f.activeUsers.UpdateUserTimestamp(userID, time.Now())
   191  
   192  	// Log stats.
   193  	logMessage := append([]interface{}{
   194  		"msg", "query stats",
   195  		"component", "query-frontend",
   196  		"method", r.Method,
   197  		"path", r.URL.Path,
   198  		"response_time", queryResponseTime,
   199  		"query_wall_time_seconds", wallTime.Seconds(),
   200  		"fetched_series_count", numSeries,
   201  		"fetched_chunks_bytes", numBytes,
   202  	}, formatQueryString(queryString)...)
   203  
   204  	level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
   205  }
   206  
   207  func (f *Handler) parseRequestQueryString(r *http.Request, bodyBuf bytes.Buffer) url.Values {
   208  	// Use previously buffered body.
   209  	r.Body = ioutil.NopCloser(&bodyBuf)
   210  
   211  	// Ensure the form has been parsed so all the parameters are present
   212  	err := r.ParseForm()
   213  	if err != nil {
   214  		level.Warn(util_log.WithContext(r.Context(), f.log)).Log("msg", "unable to parse request form", "err", err)
   215  		return nil
   216  	}
   217  
   218  	return r.Form
   219  }
   220  
   221  func formatQueryString(queryString url.Values) (fields []interface{}) {
   222  	for k, v := range queryString {
   223  		fields = append(fields, fmt.Sprintf("param_%s", k), strings.Join(v, ","))
   224  	}
   225  	return fields
   226  }
   227  
   228  func writeError(w http.ResponseWriter, err error) {
   229  	switch err {
   230  	case context.Canceled:
   231  		err = errCanceled
   232  	case context.DeadlineExceeded:
   233  		err = errDeadlineExceeded
   234  	default:
   235  		if util.IsRequestBodyTooLarge(err) {
   236  			err = errRequestEntityTooLarge
   237  		}
   238  	}
   239  	server.WriteError(w, err)
   240  }
   241  
   242  func writeServiceTimingHeader(queryResponseTime time.Duration, headers http.Header, stats *querier_stats.Stats) {
   243  	if stats != nil {
   244  		parts := make([]string, 0)
   245  		parts = append(parts, statsValue("querier_wall_time", stats.LoadWallTime()))
   246  		parts = append(parts, statsValue("response_time", queryResponseTime))
   247  		headers.Set(ServiceTimingHeaderName, strings.Join(parts, ", "))
   248  	}
   249  }
   250  
   251  func statsValue(name string, d time.Duration) string {
   252  	durationInMs := strconv.FormatFloat(float64(d)/float64(time.Millisecond), 'f', -1, 64)
   253  	return name + ";dur=" + durationInMs
   254  }