github.com/thanos-io/thanos@v0.32.5/cmd/thanos/receive.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package main
     5  
     6  import (
     7  	"context"
     8  	"os"
     9  	"path"
    10  	"strings"
    11  	"time"
    12  
    13  	extflag "github.com/efficientgo/tools/extkingpin"
    14  	"github.com/go-kit/log"
    15  	"github.com/go-kit/log/level"
    16  	grpc_logging "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/logging"
    17  	"github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/tags"
    18  	"github.com/oklog/run"
    19  	"github.com/opentracing/opentracing-go"
    20  	"github.com/pkg/errors"
    21  	"github.com/prometheus/client_golang/prometheus"
    22  	"github.com/prometheus/client_golang/prometheus/promauto"
    23  	"github.com/prometheus/common/model"
    24  	"github.com/prometheus/prometheus/model/labels"
    25  	"github.com/prometheus/prometheus/model/relabel"
    26  	"github.com/prometheus/prometheus/tsdb"
    27  	"github.com/prometheus/prometheus/tsdb/wlog"
    28  	"google.golang.org/grpc"
    29  	"gopkg.in/yaml.v2"
    30  
    31  	"github.com/thanos-io/objstore"
    32  	"github.com/thanos-io/objstore/client"
    33  	objstoretracing "github.com/thanos-io/objstore/tracing/opentracing"
    34  
    35  	"github.com/thanos-io/thanos/pkg/block/metadata"
    36  	"github.com/thanos-io/thanos/pkg/component"
    37  	"github.com/thanos-io/thanos/pkg/exemplars"
    38  	"github.com/thanos-io/thanos/pkg/extgrpc"
    39  	"github.com/thanos-io/thanos/pkg/extgrpc/snappy"
    40  	"github.com/thanos-io/thanos/pkg/extkingpin"
    41  	"github.com/thanos-io/thanos/pkg/extprom"
    42  	"github.com/thanos-io/thanos/pkg/info"
    43  	"github.com/thanos-io/thanos/pkg/info/infopb"
    44  	"github.com/thanos-io/thanos/pkg/logging"
    45  	"github.com/thanos-io/thanos/pkg/prober"
    46  	"github.com/thanos-io/thanos/pkg/receive"
    47  	"github.com/thanos-io/thanos/pkg/runutil"
    48  	grpcserver "github.com/thanos-io/thanos/pkg/server/grpc"
    49  	httpserver "github.com/thanos-io/thanos/pkg/server/http"
    50  	"github.com/thanos-io/thanos/pkg/store"
    51  	"github.com/thanos-io/thanos/pkg/store/labelpb"
    52  	"github.com/thanos-io/thanos/pkg/tenancy"
    53  	"github.com/thanos-io/thanos/pkg/tls"
    54  )
    55  
    56  const compressionNone = "none"
    57  
    58  func registerReceive(app *extkingpin.App) {
    59  	cmd := app.Command(component.Receive.String(), "Accept Prometheus remote write API requests and write to local tsdb.")
    60  
    61  	conf := &receiveConfig{}
    62  	conf.registerFlag(cmd)
    63  
    64  	cmd.Setup(func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, debugLogging bool) error {
    65  		lset, err := parseFlagLabels(conf.labelStrs)
    66  		if err != nil {
    67  			return errors.Wrap(err, "parse labels")
    68  		}
    69  
    70  		if !model.LabelName.IsValid(model.LabelName(conf.tenantLabelName)) {
    71  			return errors.Errorf("unsupported format for tenant label name, got %s", conf.tenantLabelName)
    72  		}
    73  		if len(lset) == 0 {
    74  			return errors.New("no external labels configured for receive, uniquely identifying external labels must be configured (ideally with `receive_` prefix); see https://thanos.io/tip/thanos/storage.md#external-labels for details.")
    75  		}
    76  
    77  		tagOpts, grpcLogOpts, err := logging.ParsegRPCOptions("", conf.reqLogConfig)
    78  		if err != nil {
    79  			return errors.Wrap(err, "error while parsing config for request logging")
    80  		}
    81  
    82  		tsdbOpts := &tsdb.Options{
    83  			MinBlockDuration:               int64(time.Duration(*conf.tsdbMinBlockDuration) / time.Millisecond),
    84  			MaxBlockDuration:               int64(time.Duration(*conf.tsdbMaxBlockDuration) / time.Millisecond),
    85  			RetentionDuration:              int64(time.Duration(*conf.retention) / time.Millisecond),
    86  			OutOfOrderTimeWindow:           int64(time.Duration(*conf.tsdbOutOfOrderTimeWindow) / time.Millisecond),
    87  			OutOfOrderCapMax:               conf.tsdbOutOfOrderCapMax,
    88  			NoLockfile:                     conf.noLockFile,
    89  			WALCompression:                 wlog.ParseCompressionType(conf.walCompression, string(wlog.CompressionSnappy)),
    90  			MaxExemplars:                   conf.tsdbMaxExemplars,
    91  			EnableExemplarStorage:          conf.tsdbMaxExemplars > 0,
    92  			HeadChunksWriteQueueSize:       int(conf.tsdbWriteQueueSize),
    93  			EnableMemorySnapshotOnShutdown: conf.tsdbMemorySnapshotOnShutdown,
    94  			EnableNativeHistograms:         conf.tsdbEnableNativeHistograms,
    95  		}
    96  
    97  		// Are we running in IngestorOnly, RouterOnly or RouterIngestor mode?
    98  		receiveMode := conf.determineMode()
    99  
   100  		return runReceive(
   101  			g,
   102  			logger,
   103  			debugLogging,
   104  			reg,
   105  			tracer,
   106  			grpcLogOpts, tagOpts,
   107  			tsdbOpts,
   108  			lset,
   109  			component.Receive,
   110  			metadata.HashFunc(conf.hashFunc),
   111  			receiveMode,
   112  			conf,
   113  		)
   114  	})
   115  }
   116  
   117  func runReceive(
   118  	g *run.Group,
   119  	logger log.Logger,
   120  	debugLogging bool,
   121  	reg *prometheus.Registry,
   122  	tracer opentracing.Tracer,
   123  	grpcLogOpts []grpc_logging.Option,
   124  	tagOpts []tags.Option,
   125  	tsdbOpts *tsdb.Options,
   126  	lset labels.Labels,
   127  	comp component.SourceStoreAPI,
   128  	hashFunc metadata.HashFunc,
   129  	receiveMode receive.ReceiverMode,
   130  	conf *receiveConfig,
   131  ) error {
   132  	logger = log.With(logger, "component", "receive")
   133  
   134  	level.Info(logger).Log("mode", receiveMode, "msg", "running receive")
   135  
   136  	rwTLSConfig, err := tls.NewServerConfig(log.With(logger, "protocol", "HTTP"), conf.rwServerCert, conf.rwServerKey, conf.rwServerClientCA)
   137  	if err != nil {
   138  		return err
   139  	}
   140  
   141  	dialOpts, err := extgrpc.StoreClientGRPCOpts(
   142  		logger,
   143  		reg,
   144  		tracer,
   145  		conf.grpcConfig.tlsSrvCert != "",
   146  		conf.grpcConfig.tlsSrvClientCA == "",
   147  		conf.rwClientCert,
   148  		conf.rwClientKey,
   149  		conf.rwClientServerCA,
   150  		conf.rwClientServerName,
   151  	)
   152  	if err != nil {
   153  		return err
   154  	}
   155  	if conf.compression != compressionNone {
   156  		dialOpts = append(dialOpts, grpc.WithDefaultCallOptions(grpc.UseCompressor(conf.compression)))
   157  	}
   158  
   159  	var bkt objstore.Bucket
   160  	confContentYaml, err := conf.objStoreConfig.Content()
   161  	if err != nil {
   162  		return err
   163  	}
   164  
   165  	// Has this thanos receive instance been configured to ingest metrics into a local TSDB?
   166  	enableIngestion := receiveMode == receive.IngestorOnly || receiveMode == receive.RouterIngestor
   167  
   168  	upload := len(confContentYaml) > 0
   169  	if enableIngestion {
   170  		if upload {
   171  			if tsdbOpts.MinBlockDuration != tsdbOpts.MaxBlockDuration {
   172  				if !conf.ignoreBlockSize {
   173  					return errors.Errorf("found that TSDB Max time is %d and Min time is %d. "+
   174  						"Compaction needs to be disabled (tsdb.min-block-duration = tsdb.max-block-duration)", tsdbOpts.MaxBlockDuration, tsdbOpts.MinBlockDuration)
   175  				}
   176  				level.Warn(logger).Log("msg", "flag to ignore min/max block duration flags differing is being used. If the upload of a 2h block fails and a tsdb compaction happens that block may be missing from your Thanos bucket storage.")
   177  			}
   178  			// The background shipper continuously scans the data directory and uploads
   179  			// new blocks to object storage service.
   180  			bkt, err = client.NewBucket(logger, confContentYaml, comp.String())
   181  			if err != nil {
   182  				return err
   183  			}
   184  			bkt = objstoretracing.WrapWithTraces(objstore.WrapWithMetrics(bkt, extprom.WrapRegistererWithPrefix("thanos_", reg), bkt.Name()))
   185  		} else {
   186  			level.Info(logger).Log("msg", "no supported bucket was configured, uploads will be disabled")
   187  		}
   188  	}
   189  
   190  	// TODO(brancz): remove after a couple of versions
   191  	// Migrate non-multi-tsdb capable storage to multi-tsdb disk layout.
   192  	if err := migrateLegacyStorage(logger, conf.dataDir, conf.defaultTenantID); err != nil {
   193  		return errors.Wrapf(err, "migrate legacy storage in %v to default tenant %v", conf.dataDir, conf.defaultTenantID)
   194  	}
   195  
   196  	relabelContentYaml, err := conf.relabelConfigPath.Content()
   197  	if err != nil {
   198  		return errors.Wrap(err, "get content of relabel configuration")
   199  	}
   200  	var relabelConfig []*relabel.Config
   201  	if err := yaml.Unmarshal(relabelContentYaml, &relabelConfig); err != nil {
   202  		return errors.Wrap(err, "parse relabel configuration")
   203  	}
   204  
   205  	dbs := receive.NewMultiTSDB(
   206  		conf.dataDir,
   207  		logger,
   208  		reg,
   209  		tsdbOpts,
   210  		lset,
   211  		conf.tenantLabelName,
   212  		bkt,
   213  		conf.allowOutOfOrderUpload,
   214  		hashFunc,
   215  	)
   216  	writer := receive.NewWriter(log.With(logger, "component", "receive-writer"), dbs, &receive.WriterOptions{
   217  		Intern:                   conf.writerInterning,
   218  		TooFarInFutureTimeWindow: int64(time.Duration(*conf.tsdbTooFarInFutureTimeWindow)),
   219  	})
   220  
   221  	var limitsConfig *receive.RootLimitsConfig
   222  	if conf.writeLimitsConfig != nil {
   223  		limitsContentYaml, err := conf.writeLimitsConfig.Content()
   224  		if err != nil {
   225  			return errors.Wrap(err, "get content of limit configuration")
   226  		}
   227  		limitsConfig, err = receive.ParseRootLimitConfig(limitsContentYaml)
   228  		if err != nil {
   229  			return errors.Wrap(err, "parse limit configuration")
   230  		}
   231  	}
   232  	limiter, err := receive.NewLimiter(conf.writeLimitsConfig, reg, receiveMode, log.With(logger, "component", "receive-limiter"), conf.limitsConfigReloadTimer)
   233  	if err != nil {
   234  		return errors.Wrap(err, "creating limiter")
   235  	}
   236  
   237  	webHandler := receive.NewHandler(log.With(logger, "component", "receive-handler"), &receive.Options{
   238  		Writer:            writer,
   239  		ListenAddress:     conf.rwAddress,
   240  		Registry:          reg,
   241  		Endpoint:          conf.endpoint,
   242  		TenantHeader:      conf.tenantHeader,
   243  		TenantField:       conf.tenantField,
   244  		DefaultTenantID:   conf.defaultTenantID,
   245  		ReplicaHeader:     conf.replicaHeader,
   246  		ReplicationFactor: conf.replicationFactor,
   247  		RelabelConfigs:    relabelConfig,
   248  		ReceiverMode:      receiveMode,
   249  		Tracer:            tracer,
   250  		TLSConfig:         rwTLSConfig,
   251  		DialOpts:          dialOpts,
   252  		ForwardTimeout:    time.Duration(*conf.forwardTimeout),
   253  		MaxBackoff:        time.Duration(*conf.maxBackoff),
   254  		TSDBStats:         dbs,
   255  		Limiter:           limiter,
   256  	})
   257  
   258  	grpcProbe := prober.NewGRPC()
   259  	httpProbe := prober.NewHTTP()
   260  	statusProber := prober.Combine(
   261  		httpProbe,
   262  		grpcProbe,
   263  		prober.NewInstrumentation(comp, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)),
   264  	)
   265  
   266  	// Start all components while we wait for TSDB to open but only load
   267  	// initial config and mark ourselves as ready after it completes.
   268  
   269  	// hashringChangedChan signals when TSDB needs to be flushed and updated due to hashring config change.
   270  	hashringChangedChan := make(chan struct{}, 1)
   271  
   272  	if enableIngestion {
   273  		// uploadC signals when new blocks should be uploaded.
   274  		uploadC := make(chan struct{}, 1)
   275  		// uploadDone signals when uploading has finished.
   276  		uploadDone := make(chan struct{}, 1)
   277  
   278  		level.Debug(logger).Log("msg", "setting up TSDB")
   279  		{
   280  			if err := startTSDBAndUpload(g, logger, reg, dbs, uploadC, hashringChangedChan, upload, uploadDone, statusProber, bkt, receive.HashringAlgorithm(conf.hashringsAlgorithm)); err != nil {
   281  				return err
   282  			}
   283  		}
   284  	}
   285  
   286  	level.Debug(logger).Log("msg", "setting up hashring")
   287  	{
   288  		if err := setupHashring(g, logger, reg, conf, hashringChangedChan, webHandler, statusProber, enableIngestion, dbs); err != nil {
   289  			return err
   290  		}
   291  	}
   292  
   293  	level.Debug(logger).Log("msg", "setting up HTTP server")
   294  	{
   295  		srv := httpserver.New(logger, reg, comp, httpProbe,
   296  			httpserver.WithListen(*conf.httpBindAddr),
   297  			httpserver.WithGracePeriod(time.Duration(*conf.httpGracePeriod)),
   298  			httpserver.WithTLSConfig(*conf.httpTLSConfig),
   299  		)
   300  		g.Add(func() error {
   301  			statusProber.Healthy()
   302  			return srv.ListenAndServe()
   303  		}, func(err error) {
   304  			statusProber.NotReady(err)
   305  			defer statusProber.NotHealthy(err)
   306  
   307  			srv.Shutdown(err)
   308  		})
   309  	}
   310  
   311  	level.Debug(logger).Log("msg", "setting up gRPC server")
   312  	{
   313  		tlsCfg, err := tls.NewServerConfig(log.With(logger, "protocol", "gRPC"), conf.grpcConfig.tlsSrvCert, conf.grpcConfig.tlsSrvKey, conf.grpcConfig.tlsSrvClientCA)
   314  		if err != nil {
   315  			return errors.Wrap(err, "setup gRPC server")
   316  		}
   317  
   318  		options := []store.ProxyStoreOption{}
   319  		if debugLogging {
   320  			options = append(options, store.WithProxyStoreDebugLogging())
   321  		}
   322  
   323  		proxy := store.NewProxyStore(
   324  			logger,
   325  			reg,
   326  			dbs.TSDBLocalClients,
   327  			comp,
   328  			labels.Labels{},
   329  			0,
   330  			store.LazyRetrieval,
   331  			options...,
   332  		)
   333  		mts := store.NewLimitedStoreServer(store.NewInstrumentedStoreServer(reg, proxy), reg, conf.storeRateLimits)
   334  		rw := store.ReadWriteTSDBStore{
   335  			StoreServer:          mts,
   336  			WriteableStoreServer: webHandler,
   337  		}
   338  
   339  		infoSrv := info.NewInfoServer(
   340  			component.Receive.String(),
   341  			info.WithLabelSetFunc(func() []labelpb.ZLabelSet { return proxy.LabelSet() }),
   342  			info.WithStoreInfoFunc(func() *infopb.StoreInfo {
   343  				if httpProbe.IsReady() {
   344  					minTime, maxTime := proxy.TimeRange()
   345  					return &infopb.StoreInfo{
   346  						MinTime:                      minTime,
   347  						MaxTime:                      maxTime,
   348  						SupportsSharding:             true,
   349  						SupportsWithoutReplicaLabels: true,
   350  						TsdbInfos:                    proxy.TSDBInfos(),
   351  					}
   352  				}
   353  				return nil
   354  			}),
   355  			info.WithExemplarsInfoFunc(),
   356  		)
   357  
   358  		srv := grpcserver.New(logger, receive.NewUnRegisterer(reg), tracer, grpcLogOpts, tagOpts, comp, grpcProbe,
   359  			grpcserver.WithServer(store.RegisterStoreServer(rw, logger)),
   360  			grpcserver.WithServer(store.RegisterWritableStoreServer(rw)),
   361  			grpcserver.WithServer(exemplars.RegisterExemplarsServer(exemplars.NewMultiTSDB(dbs.TSDBExemplars))),
   362  			grpcserver.WithServer(info.RegisterInfoServer(infoSrv)),
   363  			grpcserver.WithListen(conf.grpcConfig.bindAddress),
   364  			grpcserver.WithGracePeriod(conf.grpcConfig.gracePeriod),
   365  			grpcserver.WithMaxConnAge(conf.grpcConfig.maxConnectionAge),
   366  			grpcserver.WithTLSConfig(tlsCfg),
   367  		)
   368  
   369  		g.Add(
   370  			func() error {
   371  				level.Info(logger).Log("msg", "listening for StoreAPI and WritableStoreAPI gRPC", "address", conf.grpcConfig.bindAddress)
   372  				statusProber.Healthy()
   373  				return srv.ListenAndServe()
   374  			},
   375  			func(err error) {
   376  				statusProber.NotReady(err)
   377  				defer statusProber.NotHealthy(err)
   378  
   379  				srv.Shutdown(err)
   380  			},
   381  		)
   382  	}
   383  
   384  	level.Debug(logger).Log("msg", "setting up receive HTTP handler")
   385  	{
   386  		g.Add(
   387  			func() error {
   388  				return errors.Wrap(webHandler.Run(), "error starting web server")
   389  			},
   390  			func(err error) {
   391  				webHandler.Close()
   392  			},
   393  		)
   394  	}
   395  
   396  	if limitsConfig.AreHeadSeriesLimitsConfigured() {
   397  		level.Info(logger).Log("msg", "setting up periodic (every 15s) meta-monitoring query for limiting cache")
   398  		{
   399  			ctx, cancel := context.WithCancel(context.Background())
   400  			g.Add(func() error {
   401  				return runutil.Repeat(15*time.Second, ctx.Done(), func() error {
   402  					if err := limiter.HeadSeriesLimiter.QueryMetaMonitoring(ctx); err != nil {
   403  						level.Error(logger).Log("msg", "failed to query meta-monitoring", "err", err.Error())
   404  					}
   405  					return nil
   406  				})
   407  			}, func(err error) {
   408  				cancel()
   409  			})
   410  		}
   411  	}
   412  
   413  	level.Debug(logger).Log("msg", "setting up periodic tenant pruning")
   414  	{
   415  		ctx, cancel := context.WithCancel(context.Background())
   416  		g.Add(func() error {
   417  			return runutil.Repeat(2*time.Hour, ctx.Done(), func() error {
   418  				if err := dbs.Prune(ctx); err != nil {
   419  					level.Error(logger).Log("err", err)
   420  				}
   421  				return nil
   422  			})
   423  		}, func(err error) {
   424  			cancel()
   425  		})
   426  	}
   427  
   428  	{
   429  		if limiter.CanReload() {
   430  			ctx, cancel := context.WithCancel(context.Background())
   431  			g.Add(func() error {
   432  				level.Debug(logger).Log("msg", "limits config initialized with file watcher.")
   433  				if err := limiter.StartConfigReloader(ctx); err != nil {
   434  					return err
   435  				}
   436  				<-ctx.Done()
   437  				return nil
   438  			}, func(err error) {
   439  				cancel()
   440  			})
   441  		}
   442  	}
   443  
   444  	level.Info(logger).Log("msg", "starting receiver")
   445  	return nil
   446  }
   447  
   448  // setupHashring sets up the hashring configuration provided.
   449  // If no hashring is provided, we setup a single node hashring with local endpoint.
   450  func setupHashring(g *run.Group,
   451  	logger log.Logger,
   452  	reg *prometheus.Registry,
   453  	conf *receiveConfig,
   454  	hashringChangedChan chan struct{},
   455  	webHandler *receive.Handler,
   456  	statusProber prober.Probe,
   457  	enableIngestion bool,
   458  	dbs *receive.MultiTSDB,
   459  ) error {
   460  	// Note: the hashring configuration watcher
   461  	// is the sender and thus closes the chan.
   462  	// In the single-node case, which has no configuration
   463  	// watcher, we close the chan ourselves.
   464  	updates := make(chan []receive.HashringConfig, 1)
   465  	algorithm := receive.HashringAlgorithm(conf.hashringsAlgorithm)
   466  
   467  	// The Hashrings config file path is given initializing config watcher.
   468  	if conf.hashringsFilePath != "" {
   469  		cw, err := receive.NewConfigWatcher(log.With(logger, "component", "config-watcher"), reg, conf.hashringsFilePath, *conf.refreshInterval)
   470  		if err != nil {
   471  			return errors.Wrap(err, "failed to initialize config watcher")
   472  		}
   473  
   474  		// Check the hashring configuration on before running the watcher.
   475  		if err := cw.ValidateConfig(); err != nil {
   476  			cw.Stop()
   477  			close(updates)
   478  			return errors.Wrap(err, "failed to validate hashring configuration file")
   479  		}
   480  
   481  		ctx, cancel := context.WithCancel(context.Background())
   482  		g.Add(func() error {
   483  			return receive.ConfigFromWatcher(ctx, updates, cw)
   484  		}, func(error) {
   485  			cancel()
   486  		})
   487  	} else {
   488  		var (
   489  			cf  []receive.HashringConfig
   490  			err error
   491  		)
   492  		// The Hashrings config file content given initialize configuration from content.
   493  		if len(conf.hashringsFileContent) > 0 {
   494  			cf, err = receive.ParseConfig([]byte(conf.hashringsFileContent))
   495  			if err != nil {
   496  				close(updates)
   497  				return errors.Wrap(err, "failed to validate hashring configuration content")
   498  			}
   499  		}
   500  
   501  		cancel := make(chan struct{})
   502  		g.Add(func() error {
   503  			defer close(updates)
   504  			updates <- cf
   505  			<-cancel
   506  			return nil
   507  		}, func(error) {
   508  			close(cancel)
   509  		})
   510  	}
   511  
   512  	cancel := make(chan struct{})
   513  	g.Add(func() error {
   514  
   515  		if enableIngestion {
   516  			defer close(hashringChangedChan)
   517  		}
   518  
   519  		for {
   520  			select {
   521  			case c, ok := <-updates:
   522  				if !ok {
   523  					return nil
   524  				}
   525  
   526  				if c == nil {
   527  					webHandler.Hashring(receive.SingleNodeHashring(conf.endpoint))
   528  					level.Info(logger).Log("msg", "Empty hashring config. Set up single node hashring.")
   529  				} else {
   530  					h, err := receive.NewMultiHashring(algorithm, conf.replicationFactor, c)
   531  					if err != nil {
   532  						return errors.Wrap(err, "unable to create new hashring from config")
   533  					}
   534  					webHandler.Hashring(h)
   535  					level.Info(logger).Log("msg", "Set up hashring for the given hashring config.")
   536  				}
   537  
   538  				if err := dbs.SetHashringConfig(c); err != nil {
   539  					return errors.Wrap(err, "failed to set hashring config in MultiTSDB")
   540  				}
   541  
   542  				// If ingestion is enabled, send a signal to TSDB to flush.
   543  				if enableIngestion {
   544  					hashringChangedChan <- struct{}{}
   545  				} else {
   546  					// If not, just signal we are ready (this is important during first hashring load)
   547  					statusProber.Ready()
   548  				}
   549  			case <-cancel:
   550  				return nil
   551  			}
   552  		}
   553  	}, func(err error) {
   554  		close(cancel)
   555  	},
   556  	)
   557  	return nil
   558  }
   559  
   560  // startTSDBAndUpload starts the multi-TSDB and sets up the rungroup to flush the TSDB and reload on hashring change.
   561  // It also upload blocks to object store, if upload is enabled.
   562  func startTSDBAndUpload(g *run.Group,
   563  	logger log.Logger,
   564  	reg *prometheus.Registry,
   565  	dbs *receive.MultiTSDB,
   566  	uploadC chan struct{},
   567  	hashringChangedChan chan struct{},
   568  	upload bool,
   569  	uploadDone chan struct{},
   570  	statusProber prober.Probe,
   571  	bkt objstore.Bucket,
   572  	hashringAlgorithm receive.HashringAlgorithm,
   573  ) error {
   574  
   575  	log.With(logger, "component", "storage")
   576  	dbUpdatesStarted := promauto.With(reg).NewCounter(prometheus.CounterOpts{
   577  		Name: "thanos_receive_multi_db_updates_attempted_total",
   578  		Help: "Number of Multi DB attempted reloads with flush and potential upload due to hashring changes",
   579  	})
   580  	dbUpdatesCompleted := promauto.With(reg).NewCounter(prometheus.CounterOpts{
   581  		Name: "thanos_receive_multi_db_updates_completed_total",
   582  		Help: "Number of Multi DB completed reloads with flush and potential upload due to hashring changes",
   583  	})
   584  
   585  	level.Debug(logger).Log("msg", "removing storage lock files if any")
   586  	if err := dbs.RemoveLockFilesIfAny(); err != nil {
   587  		return errors.Wrap(err, "remove storage lock files")
   588  	}
   589  
   590  	// TSDBs reload logic, listening on hashring changes.
   591  	cancel := make(chan struct{})
   592  	g.Add(func() error {
   593  		defer close(uploadC)
   594  
   595  		// Before quitting, ensure the WAL is flushed and the DBs are closed.
   596  		defer func() {
   597  			level.Info(logger).Log("msg", "shutting down storage")
   598  			if err := dbs.Flush(); err != nil {
   599  				level.Error(logger).Log("err", err, "msg", "failed to flush storage")
   600  			} else {
   601  				level.Info(logger).Log("msg", "storage is flushed successfully")
   602  			}
   603  			if err := dbs.Close(); err != nil {
   604  				level.Error(logger).Log("err", err, "msg", "failed to close storage")
   605  				return
   606  			}
   607  			level.Info(logger).Log("msg", "storage is closed")
   608  		}()
   609  
   610  		var initialized bool
   611  		for {
   612  			select {
   613  			case <-cancel:
   614  				return nil
   615  			case _, ok := <-hashringChangedChan:
   616  				if !ok {
   617  					return nil
   618  				}
   619  
   620  				// When using Ketama as the hashring algorithm, there is no need to flush the TSDB head.
   621  				// If new receivers were added to the hashring, existing receivers will not need to
   622  				// ingest additional series.
   623  				// If receivers are removed from the hashring, existing receivers will only need
   624  				// to ingest a subset of the series that were assigned to the removed receivers.
   625  				// As a result, changing the hashring produces no churn, hence no need to force
   626  				// head compaction and upload.
   627  				flushHead := !initialized || hashringAlgorithm != receive.AlgorithmKetama
   628  				if flushHead {
   629  					msg := "hashring has changed; server is not ready to receive requests"
   630  					statusProber.NotReady(errors.New(msg))
   631  					level.Info(logger).Log("msg", msg)
   632  
   633  					level.Info(logger).Log("msg", "updating storage")
   634  					dbUpdatesStarted.Inc()
   635  					if err := dbs.Flush(); err != nil {
   636  						return errors.Wrap(err, "flushing storage")
   637  					}
   638  					if err := dbs.Open(); err != nil {
   639  						return errors.Wrap(err, "opening storage")
   640  					}
   641  					if upload {
   642  						uploadC <- struct{}{}
   643  						<-uploadDone
   644  					}
   645  					dbUpdatesCompleted.Inc()
   646  					statusProber.Ready()
   647  					level.Info(logger).Log("msg", "storage started, and server is ready to receive requests")
   648  					dbUpdatesCompleted.Inc()
   649  				}
   650  				initialized = true
   651  			}
   652  		}
   653  	}, func(err error) {
   654  		close(cancel)
   655  	})
   656  
   657  	if upload {
   658  		logger := log.With(logger, "component", "uploader")
   659  		upload := func(ctx context.Context) error {
   660  			level.Debug(logger).Log("msg", "upload phase starting")
   661  			start := time.Now()
   662  
   663  			uploaded, err := dbs.Sync(ctx)
   664  			if err != nil {
   665  				level.Warn(logger).Log("msg", "upload failed", "elapsed", time.Since(start), "err", err)
   666  				return err
   667  			}
   668  			level.Debug(logger).Log("msg", "upload phase done", "uploaded", uploaded, "elapsed", time.Since(start))
   669  			return nil
   670  		}
   671  		{
   672  			level.Info(logger).Log("msg", "upload enabled, starting initial sync")
   673  			if err := upload(context.Background()); err != nil {
   674  				return errors.Wrap(err, "initial upload failed")
   675  			}
   676  			level.Info(logger).Log("msg", "initial sync done")
   677  		}
   678  		{
   679  			ctx, cancel := context.WithCancel(context.Background())
   680  			g.Add(func() error {
   681  				// Ensure we clean up everything properly.
   682  				defer func() {
   683  					runutil.CloseWithLogOnErr(logger, bkt, "bucket client")
   684  				}()
   685  
   686  				// Before quitting, ensure all blocks are uploaded.
   687  				defer func() {
   688  					<-uploadC // Closed by storage routine when it's done.
   689  					level.Info(logger).Log("msg", "uploading the final cut block before exiting")
   690  					ctx, cancel := context.WithCancel(context.Background())
   691  					uploaded, err := dbs.Sync(ctx)
   692  					if err != nil {
   693  						cancel()
   694  						level.Error(logger).Log("msg", "the final upload failed", "err", err)
   695  						return
   696  					}
   697  					cancel()
   698  					level.Info(logger).Log("msg", "the final cut block was uploaded", "uploaded", uploaded)
   699  				}()
   700  
   701  				defer close(uploadDone)
   702  
   703  				// Run the uploader in a loop.
   704  				tick := time.NewTicker(30 * time.Second)
   705  				defer tick.Stop()
   706  
   707  				for {
   708  					select {
   709  					case <-ctx.Done():
   710  						return nil
   711  					case <-uploadC:
   712  						// Upload on demand.
   713  						if err := upload(ctx); err != nil {
   714  							level.Error(logger).Log("msg", "on demand upload failed", "err", err)
   715  						}
   716  						uploadDone <- struct{}{}
   717  					case <-tick.C:
   718  						if err := upload(ctx); err != nil {
   719  							level.Error(logger).Log("msg", "recurring upload failed", "err", err)
   720  						}
   721  					}
   722  				}
   723  			}, func(error) {
   724  				cancel()
   725  			})
   726  		}
   727  	}
   728  
   729  	return nil
   730  }
   731  
   732  func migrateLegacyStorage(logger log.Logger, dataDir, defaultTenantID string) error {
   733  	defaultTenantDataDir := path.Join(dataDir, defaultTenantID)
   734  
   735  	if _, err := os.Stat(defaultTenantDataDir); !os.IsNotExist(err) {
   736  		level.Info(logger).Log("msg", "default tenant data dir already present, not attempting to migrate storage")
   737  		return nil
   738  	}
   739  
   740  	if _, err := os.Stat(dataDir); os.IsNotExist(err) {
   741  		level.Info(logger).Log("msg", "no existing storage found, no data migration attempted")
   742  		return nil
   743  	}
   744  
   745  	level.Info(logger).Log("msg", "found legacy storage, migrating to multi-tsdb layout with default tenant", "defaultTenantID", defaultTenantID)
   746  
   747  	files, err := os.ReadDir(dataDir)
   748  	if err != nil {
   749  		return errors.Wrapf(err, "read legacy data dir: %v", dataDir)
   750  	}
   751  
   752  	if err := os.MkdirAll(defaultTenantDataDir, 0750); err != nil {
   753  		return errors.Wrapf(err, "create default tenant data dir: %v", defaultTenantDataDir)
   754  	}
   755  
   756  	for _, f := range files {
   757  		from := path.Join(dataDir, f.Name())
   758  		to := path.Join(defaultTenantDataDir, f.Name())
   759  		if err := os.Rename(from, to); err != nil {
   760  			return errors.Wrapf(err, "migrate file from %v to %v", from, to)
   761  		}
   762  	}
   763  
   764  	return nil
   765  }
   766  
   767  type receiveConfig struct {
   768  	httpBindAddr    *string
   769  	httpGracePeriod *model.Duration
   770  	httpTLSConfig   *string
   771  
   772  	grpcConfig grpcConfig
   773  
   774  	rwAddress          string
   775  	rwServerCert       string
   776  	rwServerKey        string
   777  	rwServerClientCA   string
   778  	rwClientCert       string
   779  	rwClientKey        string
   780  	rwClientServerCA   string
   781  	rwClientServerName string
   782  
   783  	dataDir   string
   784  	labelStrs []string
   785  
   786  	objStoreConfig *extflag.PathOrContent
   787  	retention      *model.Duration
   788  
   789  	hashringsFilePath    string
   790  	hashringsFileContent string
   791  	hashringsAlgorithm   string
   792  
   793  	refreshInterval   *model.Duration
   794  	endpoint          string
   795  	tenantHeader      string
   796  	tenantField       string
   797  	tenantLabelName   string
   798  	defaultTenantID   string
   799  	replicaHeader     string
   800  	replicationFactor uint64
   801  	forwardTimeout    *model.Duration
   802  	maxBackoff        *model.Duration
   803  	compression       string
   804  
   805  	tsdbMinBlockDuration         *model.Duration
   806  	tsdbMaxBlockDuration         *model.Duration
   807  	tsdbTooFarInFutureTimeWindow *model.Duration
   808  	tsdbOutOfOrderTimeWindow     *model.Duration
   809  	tsdbOutOfOrderCapMax         int64
   810  	tsdbAllowOverlappingBlocks   bool
   811  	tsdbMaxExemplars             int64
   812  	tsdbWriteQueueSize           int64
   813  	tsdbMemorySnapshotOnShutdown bool
   814  	tsdbEnableNativeHistograms   bool
   815  
   816  	walCompression  bool
   817  	noLockFile      bool
   818  	writerInterning bool
   819  
   820  	hashFunc string
   821  
   822  	ignoreBlockSize       bool
   823  	allowOutOfOrderUpload bool
   824  
   825  	reqLogConfig      *extflag.PathOrContent
   826  	relabelConfigPath *extflag.PathOrContent
   827  
   828  	writeLimitsConfig       *extflag.PathOrContent
   829  	storeRateLimits         store.SeriesSelectLimits
   830  	limitsConfigReloadTimer time.Duration
   831  }
   832  
   833  func (rc *receiveConfig) registerFlag(cmd extkingpin.FlagClause) {
   834  	rc.httpBindAddr, rc.httpGracePeriod, rc.httpTLSConfig = extkingpin.RegisterHTTPFlags(cmd)
   835  	rc.grpcConfig.registerFlag(cmd)
   836  	rc.storeRateLimits.RegisterFlags(cmd)
   837  
   838  	cmd.Flag("remote-write.address", "Address to listen on for remote write requests.").
   839  		Default("0.0.0.0:19291").StringVar(&rc.rwAddress)
   840  
   841  	cmd.Flag("remote-write.server-tls-cert", "TLS Certificate for HTTP server, leave blank to disable TLS.").Default("").StringVar(&rc.rwServerCert)
   842  
   843  	cmd.Flag("remote-write.server-tls-key", "TLS Key for the HTTP server, leave blank to disable TLS.").Default("").StringVar(&rc.rwServerKey)
   844  
   845  	cmd.Flag("remote-write.server-tls-client-ca", "TLS CA to verify clients against. If no client CA is specified, there is no client verification on server side. (tls.NoClientCert)").Default("").StringVar(&rc.rwServerClientCA)
   846  
   847  	cmd.Flag("remote-write.client-tls-cert", "TLS Certificates to use to identify this client to the server.").Default("").StringVar(&rc.rwClientCert)
   848  
   849  	cmd.Flag("remote-write.client-tls-key", "TLS Key for the client's certificate.").Default("").StringVar(&rc.rwClientKey)
   850  
   851  	cmd.Flag("remote-write.client-tls-ca", "TLS CA Certificates to use to verify servers.").Default("").StringVar(&rc.rwClientServerCA)
   852  
   853  	cmd.Flag("remote-write.client-server-name", "Server name to verify the hostname on the returned TLS certificates. See https://tools.ietf.org/html/rfc4366#section-3.1").Default("").StringVar(&rc.rwClientServerName)
   854  
   855  	cmd.Flag("tsdb.path", "Data directory of TSDB.").
   856  		Default("./data").StringVar(&rc.dataDir)
   857  
   858  	cmd.Flag("label", "External labels to announce. This flag will be removed in the future when handling multiple tsdb instances is added.").PlaceHolder("key=\"value\"").StringsVar(&rc.labelStrs)
   859  
   860  	rc.objStoreConfig = extkingpin.RegisterCommonObjStoreFlags(cmd, "", false)
   861  
   862  	rc.retention = extkingpin.ModelDuration(cmd.Flag("tsdb.retention", "How long to retain raw samples on local storage. 0d - disables the retention policy (i.e. infinite retention). For more details on how retention is enforced for individual tenants, please refer to the Tenant lifecycle management section in the Receive documentation: https://thanos.io/tip/components/receive.md/#tenant-lifecycle-management").Default("15d"))
   863  
   864  	cmd.Flag("receive.hashrings-file", "Path to file that contains the hashring configuration. A watcher is initialized to watch changes and update the hashring dynamically.").PlaceHolder("<path>").StringVar(&rc.hashringsFilePath)
   865  
   866  	cmd.Flag("receive.hashrings", "Alternative to 'receive.hashrings-file' flag (lower priority). Content of file that contains the hashring configuration.").PlaceHolder("<content>").StringVar(&rc.hashringsFileContent)
   867  
   868  	hashringAlgorithmsHelptext := strings.Join([]string{string(receive.AlgorithmHashmod), string(receive.AlgorithmKetama)}, ", ")
   869  	cmd.Flag("receive.hashrings-algorithm", "The algorithm used when distributing series in the hashrings. Must be one of "+hashringAlgorithmsHelptext+". Will be overwritten by the tenant-specific algorithm in the hashring config.").
   870  		Default(string(receive.AlgorithmHashmod)).
   871  		EnumVar(&rc.hashringsAlgorithm, string(receive.AlgorithmHashmod), string(receive.AlgorithmKetama))
   872  
   873  	rc.refreshInterval = extkingpin.ModelDuration(cmd.Flag("receive.hashrings-file-refresh-interval", "Refresh interval to re-read the hashring configuration file. (used as a fallback)").
   874  		Default("5m"))
   875  
   876  	cmd.Flag("receive.local-endpoint", "Endpoint of local receive node. Used to identify the local node in the hashring configuration. If it's empty AND hashring configuration was provided, it means that receive will run in RoutingOnly mode.").StringVar(&rc.endpoint)
   877  
   878  	cmd.Flag("receive.tenant-header", "HTTP header to determine tenant for write requests.").Default(tenancy.DefaultTenantHeader).StringVar(&rc.tenantHeader)
   879  
   880  	cmd.Flag("receive.tenant-certificate-field", "Use TLS client's certificate field to determine tenant for write requests. Must be one of "+tenancy.CertificateFieldOrganization+", "+tenancy.CertificateFieldOrganizationalUnit+" or "+tenancy.CertificateFieldCommonName+". This setting will cause the receive.tenant-header flag value to be ignored.").Default("").EnumVar(&rc.tenantField, "", tenancy.CertificateFieldOrganization, tenancy.CertificateFieldOrganizationalUnit, tenancy.CertificateFieldCommonName)
   881  
   882  	cmd.Flag("receive.default-tenant-id", "Default tenant ID to use when none is provided via a header.").Default(tenancy.DefaultTenant).StringVar(&rc.defaultTenantID)
   883  
   884  	cmd.Flag("receive.tenant-label-name", "Label name through which the tenant will be announced.").Default(tenancy.DefaultTenantLabel).StringVar(&rc.tenantLabelName)
   885  
   886  	cmd.Flag("receive.replica-header", "HTTP header specifying the replica number of a write request.").Default(receive.DefaultReplicaHeader).StringVar(&rc.replicaHeader)
   887  
   888  	compressionOptions := strings.Join([]string{snappy.Name, compressionNone}, ", ")
   889  	cmd.Flag("receive.grpc-compression", "Compression algorithm to use for gRPC requests to other receivers. Must be one of: "+compressionOptions).Default(snappy.Name).EnumVar(&rc.compression, snappy.Name, compressionNone)
   890  
   891  	cmd.Flag("receive.replication-factor", "How many times to replicate incoming write requests.").Default("1").Uint64Var(&rc.replicationFactor)
   892  
   893  	rc.forwardTimeout = extkingpin.ModelDuration(cmd.Flag("receive-forward-timeout", "Timeout for each forward request.").Default("5s").Hidden())
   894  
   895  	rc.maxBackoff = extkingpin.ModelDuration(cmd.Flag("receive-forward-max-backoff", "Maximum backoff for each forward fan-out request").Default("5s").Hidden())
   896  
   897  	rc.relabelConfigPath = extflag.RegisterPathOrContent(cmd, "receive.relabel-config", "YAML file that contains relabeling configuration.", extflag.WithEnvSubstitution())
   898  
   899  	rc.tsdbMinBlockDuration = extkingpin.ModelDuration(cmd.Flag("tsdb.min-block-duration", "Min duration for local TSDB blocks").Default("2h").Hidden())
   900  
   901  	rc.tsdbMaxBlockDuration = extkingpin.ModelDuration(cmd.Flag("tsdb.max-block-duration", "Max duration for local TSDB blocks").Default("2h").Hidden())
   902  
   903  	rc.tsdbTooFarInFutureTimeWindow = extkingpin.ModelDuration(cmd.Flag("tsdb.too-far-in-future.time-window",
   904  		"[EXPERIMENTAL] Configures the allowed time window for ingesting samples too far in the future. Disabled (0s) by default"+
   905  			"Please note enable this flag will reject samples in the future of receive local NTP time + configured duration due to clock skew in remote write clients.",
   906  	).Default("0s"))
   907  
   908  	rc.tsdbOutOfOrderTimeWindow = extkingpin.ModelDuration(cmd.Flag("tsdb.out-of-order.time-window",
   909  		"[EXPERIMENTAL] Configures the allowed time window for ingestion of out-of-order samples. Disabled (0s) by default"+
   910  			"Please note if you enable this option and you use compactor, make sure you have the --enable-vertical-compaction flag enabled, otherwise you might risk compactor halt.",
   911  	).Default("0s").Hidden())
   912  
   913  	cmd.Flag("tsdb.out-of-order.cap-max",
   914  		"[EXPERIMENTAL] Configures the maximum capacity for out-of-order chunks (in samples). If set to <=0, default value 32 is assumed.",
   915  	).Default("0").Hidden().Int64Var(&rc.tsdbOutOfOrderCapMax)
   916  
   917  	cmd.Flag("tsdb.allow-overlapping-blocks", "Allow overlapping blocks, which in turn enables vertical compaction and vertical query merge. Does not do anything, enabled all the time.").Default("false").BoolVar(&rc.tsdbAllowOverlappingBlocks)
   918  
   919  	cmd.Flag("tsdb.wal-compression", "Compress the tsdb WAL.").Default("true").BoolVar(&rc.walCompression)
   920  
   921  	cmd.Flag("tsdb.no-lockfile", "Do not create lockfile in TSDB data directory. In any case, the lockfiles will be deleted on next startup.").Default("false").BoolVar(&rc.noLockFile)
   922  
   923  	cmd.Flag("tsdb.max-exemplars",
   924  		"Enables support for ingesting exemplars and sets the maximum number of exemplars that will be stored per tenant."+
   925  			" In case the exemplar storage becomes full (number of stored exemplars becomes equal to max-exemplars),"+
   926  			" ingesting a new exemplar will evict the oldest exemplar from storage. 0 (or less) value of this flag disables exemplars storage.").
   927  		Default("0").Int64Var(&rc.tsdbMaxExemplars)
   928  
   929  	cmd.Flag("tsdb.write-queue-size",
   930  		"[EXPERIMENTAL] Enables configuring the size of the chunk write queue used in the head chunks mapper. "+
   931  			"A queue size of zero (default) disables this feature entirely.").
   932  		Default("0").Hidden().Int64Var(&rc.tsdbWriteQueueSize)
   933  
   934  	cmd.Flag("tsdb.memory-snapshot-on-shutdown",
   935  		"[EXPERIMENTAL] Enables feature to snapshot in-memory chunks on shutdown for faster restarts.").
   936  		Default("false").Hidden().BoolVar(&rc.tsdbMemorySnapshotOnShutdown)
   937  
   938  	cmd.Flag("tsdb.enable-native-histograms",
   939  		"[EXPERIMENTAL] Enables the ingestion of native histograms.").
   940  		Default("false").Hidden().BoolVar(&rc.tsdbEnableNativeHistograms)
   941  
   942  	cmd.Flag("writer.intern",
   943  		"[EXPERIMENTAL] Enables string interning in receive writer, for more optimized memory usage.").
   944  		Default("false").Hidden().BoolVar(&rc.writerInterning)
   945  
   946  	cmd.Flag("hash-func", "Specify which hash function to use when calculating the hashes of produced files. If no function has been specified, it does not happen. This permits avoiding downloading some files twice albeit at some performance cost. Possible values are: \"\", \"SHA256\".").
   947  		Default("").EnumVar(&rc.hashFunc, "SHA256", "")
   948  
   949  	cmd.Flag("shipper.ignore-unequal-block-size", "If true receive will not require min and max block size flags to be set to the same value. Only use this if you want to keep long retention and compaction enabled, as in the worst case it can result in ~2h data loss for your Thanos bucket storage.").Default("false").Hidden().BoolVar(&rc.ignoreBlockSize)
   950  
   951  	cmd.Flag("shipper.allow-out-of-order-uploads",
   952  		"If true, shipper will skip failed block uploads in the given iteration and retry later. This means that some newer blocks might be uploaded sooner than older blocks."+
   953  			"This can trigger compaction without those blocks and as a result will create an overlap situation. Set it to true if you have vertical compaction enabled and wish to upload blocks as soon as possible without caring"+
   954  			"about order.").
   955  		Default("false").Hidden().BoolVar(&rc.allowOutOfOrderUpload)
   956  
   957  	rc.reqLogConfig = extkingpin.RegisterRequestLoggingFlags(cmd)
   958  
   959  	rc.writeLimitsConfig = extflag.RegisterPathOrContent(cmd, "receive.limits-config", "YAML file that contains limit configuration.", extflag.WithEnvSubstitution(), extflag.WithHidden())
   960  	cmd.Flag("receive.limits-config-reload-timer", "Minimum amount of time to pass for the limit configuration to be reloaded. Helps to avoid excessive reloads.").
   961  		Default("1s").Hidden().DurationVar(&rc.limitsConfigReloadTimer)
   962  }
   963  
   964  // determineMode returns the ReceiverMode that this receiver is configured to run in.
   965  // This is used to configure this Receiver's forwarding and ingesting behavior at runtime.
   966  func (rc *receiveConfig) determineMode() receive.ReceiverMode {
   967  	// Has the user provided some kind of hashring configuration?
   968  	hashringSpecified := rc.hashringsFileContent != "" || rc.hashringsFilePath != ""
   969  	// Has the user specified the --receive.local-endpoint flag?
   970  	localEndpointSpecified := rc.endpoint != ""
   971  
   972  	switch {
   973  	case hashringSpecified && localEndpointSpecified:
   974  		return receive.RouterIngestor
   975  	case hashringSpecified && !localEndpointSpecified:
   976  		// Be careful - if the hashring contains an address that routes to itself and does not specify a local
   977  		// endpoint - you've just created an infinite loop / fork bomb :)
   978  		return receive.RouterOnly
   979  	default:
   980  		// hashring configuration has not been provided so we ingest all metrics locally.
   981  		return receive.IngestorOnly
   982  	}
   983  }