github.com/thanos-io/thanos@v0.32.5/cmd/thanos/query_frontend.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package main
     5  
     6  import (
     7  	"net"
     8  	"net/http"
     9  	"time"
    10  
    11  	extflag "github.com/efficientgo/tools/extkingpin"
    12  	"github.com/go-kit/log"
    13  	"github.com/go-kit/log/level"
    14  	"github.com/klauspost/compress/gzhttp"
    15  	"github.com/oklog/run"
    16  	"github.com/opentracing/opentracing-go"
    17  	"github.com/pkg/errors"
    18  	"github.com/prometheus/client_golang/prometheus"
    19  	"github.com/weaveworks/common/user"
    20  	"gopkg.in/yaml.v2"
    21  
    22  	cortexfrontend "github.com/thanos-io/thanos/internal/cortex/frontend"
    23  	"github.com/thanos-io/thanos/internal/cortex/frontend/transport"
    24  	"github.com/thanos-io/thanos/internal/cortex/querier/queryrange"
    25  	cortexvalidation "github.com/thanos-io/thanos/internal/cortex/util/validation"
    26  	"github.com/thanos-io/thanos/pkg/api"
    27  	"github.com/thanos-io/thanos/pkg/component"
    28  	"github.com/thanos-io/thanos/pkg/extkingpin"
    29  	"github.com/thanos-io/thanos/pkg/extprom"
    30  	extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http"
    31  	"github.com/thanos-io/thanos/pkg/logging"
    32  	"github.com/thanos-io/thanos/pkg/prober"
    33  	"github.com/thanos-io/thanos/pkg/queryfrontend"
    34  	httpserver "github.com/thanos-io/thanos/pkg/server/http"
    35  	"github.com/thanos-io/thanos/pkg/server/http/middleware"
    36  	"github.com/thanos-io/thanos/pkg/tracing"
    37  )
    38  
    39  type queryFrontendConfig struct {
    40  	http           httpConfig
    41  	webDisableCORS bool
    42  	queryfrontend.Config
    43  	orgIdHeaders []string
    44  }
    45  
    46  func registerQueryFrontend(app *extkingpin.App) {
    47  	comp := component.QueryFrontend
    48  	cmd := app.Command(comp.String(), "Query frontend command implements a service deployed in front of queriers to improve query parallelization and caching.")
    49  	cfg := &queryFrontendConfig{
    50  		Config: queryfrontend.Config{
    51  			// Max body size is 10 MiB.
    52  			CortexHandlerConfig: &transport.HandlerConfig{
    53  				MaxBodySize: 10 * 1024 * 1024,
    54  			},
    55  			QueryRangeConfig: queryfrontend.QueryRangeConfig{
    56  				Limits: &cortexvalidation.Limits{},
    57  			},
    58  			LabelsConfig: queryfrontend.LabelsConfig{
    59  				Limits: &cortexvalidation.Limits{},
    60  			},
    61  		},
    62  	}
    63  
    64  	cfg.http.registerFlag(cmd)
    65  
    66  	cmd.Flag("web.disable-cors", "Whether to disable CORS headers to be set by Thanos. By default Thanos sets CORS headers to be allowed by all.").
    67  		Default("false").BoolVar(&cfg.webDisableCORS)
    68  
    69  	// Query range tripperware flags.
    70  	cmd.Flag("query-range.align-range-with-step", "Mutate incoming queries to align their start and end with their step for better cache-ability. Note: Grafana dashboards do that by default.").
    71  		Default("true").BoolVar(&cfg.QueryRangeConfig.AlignRangeWithStep)
    72  
    73  	cmd.Flag("query-range.request-downsampled", "Make additional query for downsampled data in case of empty or incomplete response to range request.").
    74  		Default("true").BoolVar(&cfg.QueryRangeConfig.RequestDownsampled)
    75  
    76  	cmd.Flag("query-range.split-interval", "Split query range requests by an interval and execute in parallel, it should be greater than 0 when query-range.response-cache-config is configured.").
    77  		Default("24h").DurationVar(&cfg.QueryRangeConfig.SplitQueriesByInterval)
    78  
    79  	cmd.Flag("query-range.min-split-interval", "Split query range requests above this interval in query-range.horizontal-shards requests of equal range. "+
    80  		"Using this parameter is not allowed with query-range.split-interval. "+
    81  		"One should also set query-range.split-min-horizontal-shards to a value greater than 1 to enable splitting.").
    82  		Default("0").DurationVar(&cfg.QueryRangeConfig.MinQuerySplitInterval)
    83  
    84  	cmd.Flag("query-range.max-split-interval", "Split query range below this interval in query-range.horizontal-shards. Queries with a range longer than this value will be split in multiple requests of this length.").
    85  		Default("0").DurationVar(&cfg.QueryRangeConfig.MaxQuerySplitInterval)
    86  
    87  	cmd.Flag("query-range.horizontal-shards", "Split queries in this many requests when query duration is below query-range.max-split-interval.").
    88  		Default("0").Int64Var(&cfg.QueryRangeConfig.HorizontalShards)
    89  
    90  	cmd.Flag("query-range.max-retries-per-request", "Maximum number of retries for a single query range request; beyond this, the downstream error is returned.").
    91  		Default("5").IntVar(&cfg.QueryRangeConfig.MaxRetries)
    92  
    93  	cmd.Flag("query-range.max-query-length", "Limit the query time range (end - start time) in the query-frontend, 0 disables it.").
    94  		Default("0").DurationVar((*time.Duration)(&cfg.QueryRangeConfig.Limits.MaxQueryLength))
    95  
    96  	cmd.Flag("query-range.max-query-parallelism", "Maximum number of query range requests will be scheduled in parallel by the Frontend.").
    97  		Default("14").IntVar(&cfg.QueryRangeConfig.Limits.MaxQueryParallelism)
    98  
    99  	cmd.Flag("query-range.response-cache-max-freshness", "Most recent allowed cacheable result for query range requests, to prevent caching very recent results that might still be in flux.").
   100  		Default("1m").DurationVar((*time.Duration)(&cfg.QueryRangeConfig.Limits.MaxCacheFreshness))
   101  
   102  	cmd.Flag("query-range.partial-response", "Enable partial response for query range requests if no partial_response param is specified. --no-query-range.partial-response for disabling.").
   103  		Default("true").BoolVar(&cfg.QueryRangeConfig.PartialResponseStrategy)
   104  
   105  	cfg.QueryRangeConfig.CachePathOrContent = *extflag.RegisterPathOrContent(cmd, "query-range.response-cache-config", "YAML file that contains response cache configuration.", extflag.WithEnvSubstitution())
   106  
   107  	// Labels tripperware flags.
   108  	cmd.Flag("labels.split-interval", "Split labels requests by an interval and execute in parallel, it should be greater than 0 when labels.response-cache-config is configured.").
   109  		Default("24h").DurationVar(&cfg.LabelsConfig.SplitQueriesByInterval)
   110  
   111  	cmd.Flag("labels.max-retries-per-request", "Maximum number of retries for a single label/series API request; beyond this, the downstream error is returned.").
   112  		Default("5").IntVar(&cfg.LabelsConfig.MaxRetries)
   113  
   114  	cmd.Flag("labels.max-query-parallelism", "Maximum number of labels requests will be scheduled in parallel by the Frontend.").
   115  		Default("14").IntVar(&cfg.LabelsConfig.Limits.MaxQueryParallelism)
   116  
   117  	cmd.Flag("labels.response-cache-max-freshness", "Most recent allowed cacheable result for labels requests, to prevent caching very recent results that might still be in flux.").
   118  		Default("1m").DurationVar((*time.Duration)(&cfg.LabelsConfig.Limits.MaxCacheFreshness))
   119  
   120  	cmd.Flag("labels.partial-response", "Enable partial response for labels requests if no partial_response param is specified. --no-labels.partial-response for disabling.").
   121  		Default("true").BoolVar(&cfg.LabelsConfig.PartialResponseStrategy)
   122  
   123  	cmd.Flag("labels.default-time-range", "The default metadata time range duration for retrieving labels through Labels and Series API when the range parameters are not specified.").
   124  		Default("24h").DurationVar(&cfg.DefaultTimeRange)
   125  
   126  	cfg.LabelsConfig.CachePathOrContent = *extflag.RegisterPathOrContent(cmd, "labels.response-cache-config", "YAML file that contains response cache configuration.", extflag.WithEnvSubstitution())
   127  
   128  	cmd.Flag("cache-compression-type", "Use compression in results cache. Supported values are: 'snappy' and '' (disable compression).").
   129  		Default("").StringVar(&cfg.CacheCompression)
   130  
   131  	cmd.Flag("query-frontend.downstream-url", "URL of downstream Prometheus Query compatible API.").
   132  		Default("http://localhost:9090").StringVar(&cfg.DownstreamURL)
   133  
   134  	cfg.DownstreamTripperConfig.CachePathOrContent = *extflag.RegisterPathOrContent(cmd, "query-frontend.downstream-tripper-config", "YAML file that contains downstream tripper configuration. If your downstream URL is localhost or 127.0.0.1 then it is highly recommended to increase max_idle_conns_per_host to at least 100.", extflag.WithEnvSubstitution())
   135  
   136  	cmd.Flag("query-frontend.compress-responses", "Compress HTTP responses.").
   137  		Default("false").BoolVar(&cfg.CompressResponses)
   138  
   139  	cmd.Flag("query-frontend.log-queries-longer-than", "Log queries that are slower than the specified duration. "+
   140  		"Set to 0 to disable. Set to < 0 to enable on all queries.").Default("0").DurationVar(&cfg.CortexHandlerConfig.LogQueriesLongerThan)
   141  
   142  	cmd.Flag("query-frontend.org-id-header", "Request header names used to identify the source of slow queries (repeated flag). "+
   143  		"The values of the header will be added to the org id field in the slow query log. "+
   144  		"If multiple headers match the request, the first matching arg specified will take precedence. "+
   145  		"If no headers match 'anonymous' will be used.").PlaceHolder("<http-header-name>").StringsVar(&cfg.orgIdHeaders)
   146  
   147  	cmd.Flag("query-frontend.forward-header", "List of headers forwarded by the query-frontend to downstream queriers, default is empty").PlaceHolder("<http-header-name>").StringsVar(&cfg.ForwardHeaders)
   148  
   149  	cmd.Flag("query-frontend.vertical-shards", "Number of shards to use when distributing shardable PromQL queries. For more details, you can refer to the Vertical query sharding proposal: https://thanos.io/tip/proposals-accepted/202205-vertical-query-sharding.md").IntVar(&cfg.NumShards)
   150  
   151  	cmd.Flag("log.request.decision", "Deprecation Warning - This flag would be soon deprecated, and replaced with `request.logging-config`. Request Logging for logging the start and end of requests. By default this flag is disabled. LogFinishCall : Logs the finish call of the requests. LogStartAndFinishCall : Logs the start and finish call of the requests. NoLogCall : Disable request logging.").Default("").EnumVar(&cfg.RequestLoggingDecision, "NoLogCall", "LogFinishCall", "LogStartAndFinishCall", "")
   152  	reqLogConfig := extkingpin.RegisterRequestLoggingFlags(cmd)
   153  
   154  	cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error {
   155  		httpLogOpts, err := logging.ParseHTTPOptions(cfg.RequestLoggingDecision, reqLogConfig)
   156  		if err != nil {
   157  			return errors.Wrap(err, "error while parsing config for request logging")
   158  		}
   159  
   160  		return runQueryFrontend(g, logger, reg, tracer, httpLogOpts, cfg, comp)
   161  	})
   162  }
   163  
   164  func parseTransportConfiguration(downstreamTripperConfContentYaml []byte) (*http.Transport, error) {
   165  	downstreamTripper := &http.Transport{
   166  		Proxy: http.ProxyFromEnvironment,
   167  		DialContext: (&net.Dialer{
   168  			Timeout:   30 * time.Second,
   169  			KeepAlive: 30 * time.Second,
   170  			DualStack: true,
   171  		}).DialContext,
   172  		ForceAttemptHTTP2:     true,
   173  		MaxIdleConns:          100,
   174  		IdleConnTimeout:       90 * time.Second,
   175  		TLSHandshakeTimeout:   10 * time.Second,
   176  		ExpectContinueTimeout: 1 * time.Second,
   177  	}
   178  
   179  	if len(downstreamTripperConfContentYaml) > 0 {
   180  		tripperConfig := &queryfrontend.DownstreamTripperConfig{}
   181  		if err := yaml.UnmarshalStrict(downstreamTripperConfContentYaml, tripperConfig); err != nil {
   182  			return nil, errors.Wrap(err, "parsing downstream tripper config YAML file")
   183  		}
   184  
   185  		if tripperConfig.IdleConnTimeout > 0 {
   186  			downstreamTripper.IdleConnTimeout = time.Duration(tripperConfig.IdleConnTimeout)
   187  		}
   188  		if tripperConfig.ResponseHeaderTimeout > 0 {
   189  			downstreamTripper.ResponseHeaderTimeout = time.Duration(tripperConfig.ResponseHeaderTimeout)
   190  		}
   191  		if tripperConfig.TLSHandshakeTimeout > 0 {
   192  			downstreamTripper.TLSHandshakeTimeout = time.Duration(tripperConfig.TLSHandshakeTimeout)
   193  		}
   194  		if tripperConfig.ExpectContinueTimeout > 0 {
   195  			downstreamTripper.ExpectContinueTimeout = time.Duration(tripperConfig.ExpectContinueTimeout)
   196  		}
   197  		if tripperConfig.MaxIdleConns != nil {
   198  			downstreamTripper.MaxIdleConns = *tripperConfig.MaxIdleConns
   199  		}
   200  		if tripperConfig.MaxIdleConnsPerHost != nil {
   201  			downstreamTripper.MaxIdleConnsPerHost = *tripperConfig.MaxIdleConnsPerHost
   202  		}
   203  		if tripperConfig.MaxConnsPerHost != nil {
   204  			downstreamTripper.MaxConnsPerHost = *tripperConfig.MaxConnsPerHost
   205  		}
   206  	}
   207  
   208  	return downstreamTripper, nil
   209  }
   210  
   211  func runQueryFrontend(
   212  	g *run.Group,
   213  	logger log.Logger,
   214  	reg *prometheus.Registry,
   215  	tracer opentracing.Tracer,
   216  	httpLogOpts []logging.Option,
   217  	cfg *queryFrontendConfig,
   218  	comp component.Component,
   219  ) error {
   220  	queryRangeCacheConfContentYaml, err := cfg.QueryRangeConfig.CachePathOrContent.Content()
   221  	if err != nil {
   222  		return err
   223  	}
   224  	if len(queryRangeCacheConfContentYaml) > 0 {
   225  		cacheConfig, err := queryfrontend.NewCacheConfig(logger, queryRangeCacheConfContentYaml)
   226  		if err != nil {
   227  			return errors.Wrap(err, "initializing the query range cache config")
   228  		}
   229  		cfg.QueryRangeConfig.ResultsCacheConfig = &queryrange.ResultsCacheConfig{
   230  			Compression: cfg.CacheCompression,
   231  			CacheConfig: *cacheConfig,
   232  		}
   233  	}
   234  
   235  	labelsCacheConfContentYaml, err := cfg.LabelsConfig.CachePathOrContent.Content()
   236  	if err != nil {
   237  		return err
   238  	}
   239  	if len(labelsCacheConfContentYaml) > 0 {
   240  		cacheConfig, err := queryfrontend.NewCacheConfig(logger, labelsCacheConfContentYaml)
   241  		if err != nil {
   242  			return errors.Wrap(err, "initializing the labels cache config")
   243  		}
   244  		cfg.LabelsConfig.ResultsCacheConfig = &queryrange.ResultsCacheConfig{
   245  			Compression: cfg.CacheCompression,
   246  			CacheConfig: *cacheConfig,
   247  		}
   248  	}
   249  
   250  	if err := cfg.Validate(); err != nil {
   251  		return errors.Wrap(err, "error validating the config")
   252  	}
   253  
   254  	tripperWare, err := queryfrontend.NewTripperware(cfg.Config, reg, logger)
   255  	if err != nil {
   256  		return errors.Wrap(err, "setup tripperwares")
   257  	}
   258  
   259  	// Create a downstream roundtripper.
   260  	downstreamTripperConfContentYaml, err := cfg.DownstreamTripperConfig.CachePathOrContent.Content()
   261  	if err != nil {
   262  		return err
   263  	}
   264  	downstreamTripper, err := parseTransportConfiguration(downstreamTripperConfContentYaml)
   265  	if err != nil {
   266  		return err
   267  	}
   268  
   269  	roundTripper, err := cortexfrontend.NewDownstreamRoundTripper(cfg.DownstreamURL, downstreamTripper)
   270  	if err != nil {
   271  		return errors.Wrap(err, "setup downstream roundtripper")
   272  	}
   273  
   274  	// Wrap the downstream RoundTripper into query frontend Tripperware.
   275  	roundTripper = tripperWare(roundTripper)
   276  
   277  	// Create the query frontend transport.
   278  	handler := transport.NewHandler(*cfg.CortexHandlerConfig, roundTripper, logger, nil)
   279  	if cfg.CompressResponses {
   280  		handler = gzhttp.GzipHandler(handler)
   281  	}
   282  
   283  	httpProbe := prober.NewHTTP()
   284  	statusProber := prober.Combine(
   285  		httpProbe,
   286  		prober.NewInstrumentation(comp, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)),
   287  	)
   288  
   289  	// Configure Request Logging for HTTP calls.
   290  	logMiddleware := logging.NewHTTPServerMiddleware(logger, httpLogOpts...)
   291  	ins := extpromhttp.NewInstrumentationMiddleware(reg, nil)
   292  
   293  	// Start metrics HTTP server.
   294  	{
   295  		srv := httpserver.New(logger, reg, comp, httpProbe,
   296  			httpserver.WithListen(cfg.http.bindAddress),
   297  			httpserver.WithGracePeriod(time.Duration(cfg.http.gracePeriod)),
   298  			httpserver.WithTLSConfig(cfg.http.tlsConfig),
   299  		)
   300  
   301  		instr := func(f http.HandlerFunc) http.HandlerFunc {
   302  			hf := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   303  				orgId := extractOrgId(cfg, r)
   304  				name := "query-frontend"
   305  				if !cfg.webDisableCORS {
   306  					api.SetCORS(w)
   307  				}
   308  				tracing.HTTPMiddleware(
   309  					tracer,
   310  					name,
   311  					logger,
   312  					ins.NewHandler(
   313  						name,
   314  						gzhttp.GzipHandler(
   315  							middleware.RequestID(
   316  								logMiddleware.HTTPMiddleware(name, f),
   317  							),
   318  						),
   319  					),
   320  					// Cortex frontend middlewares require orgID.
   321  				).ServeHTTP(w, r.WithContext(user.InjectOrgID(r.Context(), orgId)))
   322  			})
   323  			return hf
   324  		}
   325  		srv.Handle("/", instr(handler.ServeHTTP))
   326  
   327  		g.Add(func() error {
   328  			statusProber.Healthy()
   329  
   330  			return srv.ListenAndServe()
   331  		}, func(err error) {
   332  			statusProber.NotReady(err)
   333  			defer statusProber.NotHealthy(err)
   334  
   335  			srv.Shutdown(err)
   336  		})
   337  	}
   338  
   339  	level.Info(logger).Log("msg", "starting query frontend")
   340  	statusProber.Ready()
   341  	return nil
   342  }
   343  
   344  func extractOrgId(conf *queryFrontendConfig, r *http.Request) string {
   345  	for _, header := range conf.orgIdHeaders {
   346  		headerVal := r.Header.Get(header)
   347  		if headerVal != "" {
   348  			return headerVal
   349  		}
   350  	}
   351  	return "anonymous"
   352  }