github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ingester/ingester.go (about)

     1  package ingester
     2  
     3  import (
     4  	"context"
     5  	"flag"
     6  	"fmt"
     7  	"net/http"
     8  	"os"
     9  	"path/filepath"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/go-kit/log/level"
    14  	"github.com/grafana/dskit/concurrency"
    15  	"github.com/grafana/dskit/modules"
    16  	"github.com/grafana/dskit/ring"
    17  	"github.com/grafana/dskit/services"
    18  	"github.com/grafana/dskit/tenant"
    19  	"github.com/pkg/errors"
    20  	"github.com/prometheus/client_golang/prometheus"
    21  	"github.com/prometheus/client_golang/prometheus/promauto"
    22  	"github.com/prometheus/common/model"
    23  	"github.com/prometheus/prometheus/model/labels"
    24  	"google.golang.org/grpc/health/grpc_health_v1"
    25  
    26  	"github.com/grafana/loki/pkg/chunkenc"
    27  	"github.com/grafana/loki/pkg/ingester/client"
    28  	"github.com/grafana/loki/pkg/ingester/index"
    29  	"github.com/grafana/loki/pkg/iter"
    30  	"github.com/grafana/loki/pkg/logproto"
    31  	"github.com/grafana/loki/pkg/logql"
    32  	"github.com/grafana/loki/pkg/logql/syntax"
    33  	"github.com/grafana/loki/pkg/logqlmodel/stats"
    34  	"github.com/grafana/loki/pkg/runtime"
    35  	"github.com/grafana/loki/pkg/storage"
    36  	"github.com/grafana/loki/pkg/storage/chunk"
    37  	"github.com/grafana/loki/pkg/storage/chunk/fetcher"
    38  	"github.com/grafana/loki/pkg/storage/config"
    39  	index_stats "github.com/grafana/loki/pkg/storage/stores/index/stats"
    40  	"github.com/grafana/loki/pkg/usagestats"
    41  	"github.com/grafana/loki/pkg/util"
    42  	errUtil "github.com/grafana/loki/pkg/util"
    43  	util_log "github.com/grafana/loki/pkg/util/log"
    44  	"github.com/grafana/loki/pkg/util/wal"
    45  	"github.com/grafana/loki/pkg/validation"
    46  )
    47  
    48  const (
    49  	// RingKey is the key under which we store the ingesters ring in the KVStore.
    50  	RingKey = "ring"
    51  )
    52  
    53  // ErrReadOnly is returned when the ingester is shutting down and a push was
    54  // attempted.
    55  var (
    56  	ErrReadOnly = errors.New("Ingester is shutting down")
    57  
    58  	flushQueueLength = promauto.NewGauge(prometheus.GaugeOpts{
    59  		Name: "cortex_ingester_flush_queue_length",
    60  		Help: "The total number of series pending in the flush queue.",
    61  	})
    62  	compressionStats   = usagestats.NewString("ingester_compression")
    63  	targetSizeStats    = usagestats.NewInt("ingester_target_size_bytes")
    64  	walStats           = usagestats.NewString("ingester_wal")
    65  	activeTenantsStats = usagestats.NewInt("ingester_active_tenants")
    66  )
    67  
    68  // Config for an ingester.
    69  type Config struct {
    70  	LifecyclerConfig ring.LifecyclerConfig `yaml:"lifecycler,omitempty"`
    71  
    72  	// Config for transferring chunks.
    73  	MaxTransferRetries int `yaml:"max_transfer_retries,omitempty"`
    74  
    75  	ConcurrentFlushes   int               `yaml:"concurrent_flushes"`
    76  	FlushCheckPeriod    time.Duration     `yaml:"flush_check_period"`
    77  	FlushOpTimeout      time.Duration     `yaml:"flush_op_timeout"`
    78  	RetainPeriod        time.Duration     `yaml:"chunk_retain_period"`
    79  	MaxChunkIdle        time.Duration     `yaml:"chunk_idle_period"`
    80  	BlockSize           int               `yaml:"chunk_block_size"`
    81  	TargetChunkSize     int               `yaml:"chunk_target_size"`
    82  	ChunkEncoding       string            `yaml:"chunk_encoding"`
    83  	parsedEncoding      chunkenc.Encoding `yaml:"-"` // placeholder for validated encoding
    84  	MaxChunkAge         time.Duration     `yaml:"max_chunk_age"`
    85  	AutoForgetUnhealthy bool              `yaml:"autoforget_unhealthy"`
    86  
    87  	// Synchronization settings. Used to make sure that ingesters cut their chunks at the same moments.
    88  	SyncPeriod         time.Duration `yaml:"sync_period"`
    89  	SyncMinUtilization float64       `yaml:"sync_min_utilization"`
    90  
    91  	MaxReturnedErrors int `yaml:"max_returned_stream_errors"`
    92  
    93  	// For testing, you can override the address and ID of this ingester.
    94  	ingesterClientFactory func(cfg client.Config, addr string) (client.HealthAndIngesterClient, error)
    95  
    96  	QueryStore                  bool          `yaml:"-"`
    97  	QueryStoreMaxLookBackPeriod time.Duration `yaml:"query_store_max_look_back_period"`
    98  
    99  	WAL WALConfig `yaml:"wal,omitempty"`
   100  
   101  	ChunkFilterer chunk.RequestChunkFilterer `yaml:"-"`
   102  	// Optional wrapper that can be used to modify the behaviour of the ingester
   103  	Wrapper Wrapper `yaml:"-"`
   104  
   105  	IndexShards int `yaml:"index_shards"`
   106  
   107  	MaxDroppedStreams int `yaml:"max_dropped_streams"`
   108  }
   109  
   110  // RegisterFlags registers the flags.
   111  func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
   112  	cfg.LifecyclerConfig.RegisterFlags(f, util_log.Logger)
   113  	cfg.WAL.RegisterFlags(f)
   114  
   115  	f.IntVar(&cfg.MaxTransferRetries, "ingester.max-transfer-retries", 0, "Number of times to try and transfer chunks before falling back to flushing. If set to 0 or negative value, transfers are disabled.")
   116  	f.IntVar(&cfg.ConcurrentFlushes, "ingester.concurrent-flushes", 32, "")
   117  	f.DurationVar(&cfg.FlushCheckPeriod, "ingester.flush-check-period", 30*time.Second, "")
   118  	f.DurationVar(&cfg.FlushOpTimeout, "ingester.flush-op-timeout", 10*time.Minute, "")
   119  	f.DurationVar(&cfg.RetainPeriod, "ingester.chunks-retain-period", 0, "")
   120  	f.DurationVar(&cfg.MaxChunkIdle, "ingester.chunks-idle-period", 30*time.Minute, "")
   121  	f.IntVar(&cfg.BlockSize, "ingester.chunks-block-size", 256*1024, "")
   122  	f.IntVar(&cfg.TargetChunkSize, "ingester.chunk-target-size", 1572864, "") // 1.5 MB
   123  	f.StringVar(&cfg.ChunkEncoding, "ingester.chunk-encoding", chunkenc.EncGZIP.String(), fmt.Sprintf("The algorithm to use for compressing chunk. (%s)", chunkenc.SupportedEncoding()))
   124  	f.DurationVar(&cfg.SyncPeriod, "ingester.sync-period", 0, "How often to cut chunks to synchronize ingesters.")
   125  	f.Float64Var(&cfg.SyncMinUtilization, "ingester.sync-min-utilization", 0, "Minimum utilization of chunk when doing synchronization.")
   126  	f.IntVar(&cfg.MaxReturnedErrors, "ingester.max-ignored-stream-errors", 10, "Maximum number of ignored stream errors to return. 0 to return all errors.")
   127  	f.DurationVar(&cfg.MaxChunkAge, "ingester.max-chunk-age", 2*time.Hour, "Maximum chunk age before flushing.")
   128  	f.DurationVar(&cfg.QueryStoreMaxLookBackPeriod, "ingester.query-store-max-look-back-period", 0, "How far back should an ingester be allowed to query the store for data, for use only with boltdb-shipper/tsdb index and filesystem object store. -1 for infinite.")
   129  	f.BoolVar(&cfg.AutoForgetUnhealthy, "ingester.autoforget-unhealthy", false, "Enable to remove unhealthy ingesters from the ring after `ring.kvstore.heartbeat_timeout`")
   130  	f.IntVar(&cfg.IndexShards, "ingester.index-shards", index.DefaultIndexShards, "Shard factor used in the ingesters for the in process reverse index. This MUST be evenly divisible by ALL schema shard factors or Loki will not start.")
   131  	f.IntVar(&cfg.MaxDroppedStreams, "ingester.tailer.max-dropped-streams", 10, "Maximum number of dropped streams to keep in memory during tailing")
   132  }
   133  
   134  func (cfg *Config) Validate() error {
   135  	enc, err := chunkenc.ParseEncoding(cfg.ChunkEncoding)
   136  	if err != nil {
   137  		return err
   138  	}
   139  	cfg.parsedEncoding = enc
   140  
   141  	if err = cfg.WAL.Validate(); err != nil {
   142  		return err
   143  	}
   144  
   145  	if cfg.MaxTransferRetries > 0 && cfg.WAL.Enabled {
   146  		return errors.New("the use of the write ahead log (WAL) is incompatible with chunk transfers. It's suggested to use the WAL. Please try setting ingester.max-transfer-retries to 0 to disable transfers")
   147  	}
   148  
   149  	if cfg.IndexShards <= 0 {
   150  		return fmt.Errorf("invalid ingester index shard factor: %d", cfg.IndexShards)
   151  	}
   152  
   153  	return nil
   154  }
   155  
   156  type Wrapper interface {
   157  	Wrap(wrapped Interface) Interface
   158  }
   159  
   160  // ChunkStore is the interface we need to store chunks.
   161  type ChunkStore interface {
   162  	Put(ctx context.Context, chunks []chunk.Chunk) error
   163  	SelectLogs(ctx context.Context, req logql.SelectLogParams) (iter.EntryIterator, error)
   164  	SelectSamples(ctx context.Context, req logql.SelectSampleParams) (iter.SampleIterator, error)
   165  	GetChunkRefs(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([][]chunk.Chunk, []*fetcher.Fetcher, error)
   166  	GetSchemaConfigs() []config.PeriodConfig
   167  	Stats(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) (*index_stats.Stats, error)
   168  }
   169  
   170  // Interface is an interface for the Ingester
   171  type Interface interface {
   172  	services.Service
   173  
   174  	logproto.IngesterServer
   175  	logproto.PusherServer
   176  	logproto.QuerierServer
   177  	CheckReady(ctx context.Context) error
   178  	FlushHandler(w http.ResponseWriter, _ *http.Request)
   179  	GetOrCreateInstance(instanceID string) (*instance, error)
   180  	// deprecated
   181  	LegacyShutdownHandler(w http.ResponseWriter, r *http.Request)
   182  	ShutdownHandler(w http.ResponseWriter, r *http.Request)
   183  }
   184  
   185  // Ingester builds chunks for incoming log streams.
   186  type Ingester struct {
   187  	services.Service
   188  
   189  	cfg           Config
   190  	clientConfig  client.Config
   191  	tenantConfigs *runtime.TenantConfigs
   192  
   193  	shutdownMtx  sync.Mutex // Allows processes to grab a lock and prevent a shutdown
   194  	instancesMtx sync.RWMutex
   195  	instances    map[string]*instance
   196  	readonly     bool
   197  
   198  	lifecycler        *ring.Lifecycler
   199  	lifecyclerWatcher *services.FailureWatcher
   200  
   201  	store           ChunkStore
   202  	periodicConfigs []config.PeriodConfig
   203  
   204  	loopDone    sync.WaitGroup
   205  	loopQuit    chan struct{}
   206  	tailersQuit chan struct{}
   207  
   208  	// One queue per flush thread.  Fingerprint is used to
   209  	// pick a queue.
   210  	flushQueues     []*util.PriorityQueue
   211  	flushQueuesDone sync.WaitGroup
   212  
   213  	limiter *Limiter
   214  
   215  	// Denotes whether the ingester should flush on shutdown.
   216  	// Currently only used by the WAL to signal when the disk is full.
   217  	flushOnShutdownSwitch *OnceSwitch
   218  	// Flag for whether stopping the ingester service should also terminate the
   219  	// loki process.
   220  	// This is set when calling the shutdown handler.
   221  	terminateOnShutdown bool
   222  
   223  	// Only used by WAL & flusher to coordinate backpressure during replay.
   224  	replayController *replayController
   225  
   226  	metrics *ingesterMetrics
   227  
   228  	wal WAL
   229  
   230  	chunkFilter chunk.RequestChunkFilterer
   231  }
   232  
   233  // New makes a new Ingester.
   234  func New(cfg Config, clientConfig client.Config, store ChunkStore, limits *validation.Overrides, configs *runtime.TenantConfigs, registerer prometheus.Registerer) (*Ingester, error) {
   235  	if cfg.ingesterClientFactory == nil {
   236  		cfg.ingesterClientFactory = client.New
   237  	}
   238  	compressionStats.Set(cfg.ChunkEncoding)
   239  	targetSizeStats.Set(int64(cfg.TargetChunkSize))
   240  	walStats.Set("disabled")
   241  	if cfg.WAL.Enabled {
   242  		walStats.Set("enabled")
   243  	}
   244  	metrics := newIngesterMetrics(registerer)
   245  
   246  	i := &Ingester{
   247  		cfg:                   cfg,
   248  		clientConfig:          clientConfig,
   249  		tenantConfigs:         configs,
   250  		instances:             map[string]*instance{},
   251  		store:                 store,
   252  		periodicConfigs:       store.GetSchemaConfigs(),
   253  		loopQuit:              make(chan struct{}),
   254  		flushQueues:           make([]*util.PriorityQueue, cfg.ConcurrentFlushes),
   255  		tailersQuit:           make(chan struct{}),
   256  		metrics:               metrics,
   257  		flushOnShutdownSwitch: &OnceSwitch{},
   258  		terminateOnShutdown:   false,
   259  	}
   260  	i.replayController = newReplayController(metrics, cfg.WAL, &replayFlusher{i})
   261  
   262  	if cfg.WAL.Enabled {
   263  		if err := os.MkdirAll(cfg.WAL.Dir, os.ModePerm); err != nil {
   264  			// Best effort try to make path absolute for easier debugging.
   265  			path, _ := filepath.Abs(cfg.WAL.Dir)
   266  			if path == "" {
   267  				path = cfg.WAL.Dir
   268  			}
   269  
   270  			return nil, fmt.Errorf("creating WAL folder at %q: %w", path, err)
   271  		}
   272  	}
   273  
   274  	wal, err := newWAL(cfg.WAL, registerer, metrics, newIngesterSeriesIter(i))
   275  	if err != nil {
   276  		return nil, err
   277  	}
   278  	i.wal = wal
   279  
   280  	i.lifecycler, err = ring.NewLifecycler(cfg.LifecyclerConfig, i, "ingester", RingKey, !cfg.WAL.Enabled || cfg.WAL.FlushOnShutdown, util_log.Logger, prometheus.WrapRegistererWithPrefix("cortex_", registerer))
   281  	if err != nil {
   282  		return nil, err
   283  	}
   284  
   285  	i.lifecyclerWatcher = services.NewFailureWatcher()
   286  	i.lifecyclerWatcher.WatchService(i.lifecycler)
   287  
   288  	// Now that the lifecycler has been created, we can create the limiter
   289  	// which depends on it.
   290  	i.limiter = NewLimiter(limits, metrics, i.lifecycler, cfg.LifecyclerConfig.RingConfig.ReplicationFactor)
   291  
   292  	i.Service = services.NewBasicService(i.starting, i.running, i.stopping)
   293  
   294  	i.setupAutoForget()
   295  
   296  	if i.cfg.ChunkFilterer != nil {
   297  		i.SetChunkFilterer(i.cfg.ChunkFilterer)
   298  	}
   299  
   300  	return i, nil
   301  }
   302  
   303  func (i *Ingester) SetChunkFilterer(chunkFilter chunk.RequestChunkFilterer) {
   304  	i.chunkFilter = chunkFilter
   305  }
   306  
   307  // setupAutoForget looks for ring status if `AutoForgetUnhealthy` is enabled
   308  // when enabled, unhealthy ingesters that reach `ring.kvstore.heartbeat_timeout` are removed from the ring every `HeartbeatPeriod`
   309  func (i *Ingester) setupAutoForget() {
   310  	if !i.cfg.AutoForgetUnhealthy {
   311  		return
   312  	}
   313  
   314  	go func() {
   315  		ctx := context.Background()
   316  		err := i.Service.AwaitRunning(ctx)
   317  		if err != nil {
   318  			level.Error(util_log.Logger).Log("msg", fmt.Sprintf("autoforget received error %s, autoforget is disabled", err.Error()))
   319  			return
   320  		}
   321  
   322  		level.Info(util_log.Logger).Log("msg", fmt.Sprintf("autoforget is enabled and will remove unhealthy instances from the ring after %v with no heartbeat", i.cfg.LifecyclerConfig.RingConfig.HeartbeatTimeout))
   323  
   324  		ticker := time.NewTicker(i.cfg.LifecyclerConfig.HeartbeatPeriod)
   325  		defer ticker.Stop()
   326  
   327  		var forgetList []string
   328  		for range ticker.C {
   329  			err := i.lifecycler.KVStore.CAS(ctx, RingKey, func(in interface{}) (out interface{}, retry bool, err error) {
   330  				forgetList = forgetList[:0]
   331  				if in == nil {
   332  					return nil, false, nil
   333  				}
   334  
   335  				ringDesc, ok := in.(*ring.Desc)
   336  				if !ok {
   337  					level.Warn(util_log.Logger).Log("msg", fmt.Sprintf("autoforget saw a KV store value that was not `ring.Desc`, got `%T`", in))
   338  					return nil, false, nil
   339  				}
   340  
   341  				for id, ingester := range ringDesc.Ingesters {
   342  					if !ingester.IsHealthy(ring.Reporting, i.cfg.LifecyclerConfig.RingConfig.HeartbeatTimeout, time.Now()) {
   343  						if i.lifecycler.ID == id {
   344  							level.Warn(util_log.Logger).Log("msg", fmt.Sprintf("autoforget has seen our ID `%s` as unhealthy in the ring, network may be partitioned, skip forgeting ingesters this round", id))
   345  							return nil, false, nil
   346  						}
   347  						forgetList = append(forgetList, id)
   348  					}
   349  				}
   350  
   351  				if len(forgetList) == len(ringDesc.Ingesters)-1 {
   352  					level.Warn(util_log.Logger).Log("msg", fmt.Sprintf("autoforget have seen %d unhealthy ingesters out of %d, network may be partioned, skip forgeting ingesters this round", len(forgetList), len(ringDesc.Ingesters)))
   353  					forgetList = forgetList[:0]
   354  					return nil, false, nil
   355  				}
   356  
   357  				if len(forgetList) > 0 {
   358  					for _, id := range forgetList {
   359  						ringDesc.RemoveIngester(id)
   360  					}
   361  					return ringDesc, true, nil
   362  				}
   363  				return nil, false, nil
   364  			})
   365  			if err != nil {
   366  				level.Warn(util_log.Logger).Log("msg", err)
   367  				continue
   368  			}
   369  
   370  			for _, id := range forgetList {
   371  				level.Info(util_log.Logger).Log("msg", fmt.Sprintf("autoforget removed ingester %v from the ring because it was not healthy after %v", id, i.cfg.LifecyclerConfig.RingConfig.HeartbeatTimeout))
   372  			}
   373  			i.metrics.autoForgetUnhealthyIngestersTotal.Add(float64(len(forgetList)))
   374  		}
   375  	}()
   376  }
   377  
   378  func (i *Ingester) starting(ctx context.Context) error {
   379  	if i.cfg.WAL.Enabled {
   380  		start := time.Now()
   381  
   382  		// Ignore retain period during wal replay.
   383  		oldRetain := i.cfg.RetainPeriod
   384  		i.cfg.RetainPeriod = 0
   385  
   386  		// Disable the in process stream limit checks while replaying the WAL.
   387  		// It is re-enabled in the recover's Close() method.
   388  		i.limiter.DisableForWALReplay()
   389  
   390  		recoverer := newIngesterRecoverer(i)
   391  
   392  		i.metrics.walReplayActive.Set(1)
   393  
   394  		endReplay := func() func() {
   395  			var once sync.Once
   396  			return func() {
   397  				once.Do(func() {
   398  					level.Info(util_log.Logger).Log("msg", "closing recoverer")
   399  					recoverer.Close()
   400  
   401  					elapsed := time.Since(start)
   402  
   403  					i.metrics.walReplayActive.Set(0)
   404  					i.metrics.walReplayDuration.Set(elapsed.Seconds())
   405  					i.cfg.RetainPeriod = oldRetain
   406  					level.Info(util_log.Logger).Log("msg", "WAL recovery finished", "time", elapsed.String())
   407  				})
   408  			}
   409  		}()
   410  		defer endReplay()
   411  
   412  		level.Info(util_log.Logger).Log("msg", "recovering from checkpoint")
   413  		checkpointReader, checkpointCloser, err := newCheckpointReader(i.cfg.WAL.Dir)
   414  		if err != nil {
   415  			return err
   416  		}
   417  		defer checkpointCloser.Close()
   418  
   419  		checkpointRecoveryErr := RecoverCheckpoint(checkpointReader, recoverer)
   420  		if checkpointRecoveryErr != nil {
   421  			i.metrics.walCorruptionsTotal.WithLabelValues(walTypeCheckpoint).Inc()
   422  			level.Error(util_log.Logger).Log(
   423  				"msg",
   424  				`Recovered from checkpoint with errors. Some streams were likely not recovered due to WAL checkpoint file corruptions (or WAL file deletions while Loki is running). No administrator action is needed and data loss is only a possibility if more than (replication factor / 2 + 1) ingesters suffer from this.`,
   425  				"elapsed", time.Since(start).String(),
   426  			)
   427  		}
   428  		level.Info(util_log.Logger).Log(
   429  			"msg", "recovered WAL checkpoint recovery finished",
   430  			"elapsed", time.Since(start).String(),
   431  			"errors", checkpointRecoveryErr != nil,
   432  		)
   433  
   434  		level.Info(util_log.Logger).Log("msg", "recovering from WAL")
   435  		segmentReader, segmentCloser, err := wal.NewWalReader(i.cfg.WAL.Dir, -1)
   436  		if err != nil {
   437  			return err
   438  		}
   439  		defer segmentCloser.Close()
   440  
   441  		segmentRecoveryErr := RecoverWAL(segmentReader, recoverer)
   442  		if segmentRecoveryErr != nil {
   443  			i.metrics.walCorruptionsTotal.WithLabelValues(walTypeSegment).Inc()
   444  			level.Error(util_log.Logger).Log(
   445  				"msg",
   446  				"Recovered from WAL segments with errors. Some streams and/or entries were likely not recovered due to WAL segment file corruptions (or WAL file deletions while Loki is running). No administrator action is needed and data loss is only a possibility if more than (replication factor / 2 + 1) ingesters suffer from this.",
   447  				"elapsed", time.Since(start).String(),
   448  			)
   449  		}
   450  		level.Info(util_log.Logger).Log(
   451  			"msg", "WAL segment recovery finished",
   452  			"elapsed", time.Since(start).String(),
   453  			"errors", segmentRecoveryErr != nil,
   454  		)
   455  
   456  		endReplay()
   457  
   458  		i.wal.Start()
   459  	}
   460  
   461  	i.InitFlushQueues()
   462  
   463  	// pass new context to lifecycler, so that it doesn't stop automatically when Ingester's service context is done
   464  	err := i.lifecycler.StartAsync(context.Background())
   465  	if err != nil {
   466  		return err
   467  	}
   468  
   469  	err = i.lifecycler.AwaitRunning(ctx)
   470  	if err != nil {
   471  		return err
   472  	}
   473  
   474  	// start our loop
   475  	i.loopDone.Add(1)
   476  	go i.loop()
   477  	return nil
   478  }
   479  
   480  func (i *Ingester) running(ctx context.Context) error {
   481  	var serviceError error
   482  	select {
   483  	// wait until service is asked to stop
   484  	case <-ctx.Done():
   485  	// stop
   486  	case err := <-i.lifecyclerWatcher.Chan():
   487  		serviceError = fmt.Errorf("lifecycler failed: %w", err)
   488  	}
   489  
   490  	// close tailers before stopping our loop
   491  	close(i.tailersQuit)
   492  	for _, instance := range i.getInstances() {
   493  		instance.closeTailers()
   494  	}
   495  
   496  	close(i.loopQuit)
   497  	i.loopDone.Wait()
   498  	return serviceError
   499  }
   500  
   501  // Called after running exits, when Ingester transitions to Stopping state.
   502  // At this point, loop no longer runs, but flushers are still running.
   503  func (i *Ingester) stopping(_ error) error {
   504  	i.stopIncomingRequests()
   505  	var errs errUtil.MultiError
   506  	errs.Add(i.wal.Stop())
   507  
   508  	if i.flushOnShutdownSwitch.Get() {
   509  		i.lifecycler.SetFlushOnShutdown(true)
   510  	}
   511  	errs.Add(services.StopAndAwaitTerminated(context.Background(), i.lifecycler))
   512  
   513  	// Normally, flushers are stopped via lifecycler (in transferOut), but if lifecycler fails,
   514  	// we better stop them.
   515  	for _, flushQueue := range i.flushQueues {
   516  		flushQueue.Close()
   517  	}
   518  	i.flushQueuesDone.Wait()
   519  
   520  	// In case the flag to terminate on shutdown is set we need to mark the
   521  	// ingester service as "failed", so Loki will shut down entirely.
   522  	// The module manager logs the failure `modules.ErrStopProcess` in a special way.
   523  	if i.terminateOnShutdown && errs.Err() == nil {
   524  		return modules.ErrStopProcess
   525  	}
   526  	return errs.Err()
   527  }
   528  
   529  func (i *Ingester) loop() {
   530  	defer i.loopDone.Done()
   531  
   532  	flushTicker := time.NewTicker(i.cfg.FlushCheckPeriod)
   533  	defer flushTicker.Stop()
   534  
   535  	for {
   536  		select {
   537  		case <-flushTicker.C:
   538  			i.sweepUsers(false, true)
   539  
   540  		case <-i.loopQuit:
   541  			return
   542  		}
   543  	}
   544  }
   545  
   546  // LegacyShutdownHandler triggers the following set of operations in order:
   547  //     * Change the state of ring to stop accepting writes.
   548  //     * Flush all the chunks.
   549  // Note: This handler does not trigger a termination of the Loki process,
   550  // despite its name. Instead, the ingester service is stopped, so an external
   551  // source can trigger a safe termination through a signal to the process.
   552  // The handler is deprecated and usage is discouraged. Use ShutdownHandler
   553  // instead.
   554  func (i *Ingester) LegacyShutdownHandler(w http.ResponseWriter, r *http.Request) {
   555  	level.Warn(util_log.Logger).Log("msg", "The handler /ingester/flush_shutdown is deprecated and usage is discouraged. Please use /ingester/shutdown?flush=true instead.")
   556  	originalState := i.lifecycler.FlushOnShutdown()
   557  	// We want to flush the chunks if transfer fails irrespective of original flag.
   558  	i.lifecycler.SetFlushOnShutdown(true)
   559  	_ = services.StopAndAwaitTerminated(context.Background(), i)
   560  	i.lifecycler.SetFlushOnShutdown(originalState)
   561  	w.WriteHeader(http.StatusNoContent)
   562  }
   563  
   564  // ShutdownHandler handles a graceful shutdown of the ingester service and
   565  // termination of the Loki process.
   566  func (i *Ingester) ShutdownHandler(w http.ResponseWriter, r *http.Request) {
   567  	// Don't allow calling the shutdown handler multiple times
   568  	if i.State() != services.Running {
   569  		w.WriteHeader(http.StatusServiceUnavailable)
   570  		_, _ = w.Write([]byte("Ingester is stopping or already stopped."))
   571  		return
   572  	}
   573  	params := r.URL.Query()
   574  	doFlush := util.FlagFromValues(params, "flush", true)
   575  	doDeleteRingTokens := util.FlagFromValues(params, "delete_ring_tokens", false)
   576  	doTerminate := util.FlagFromValues(params, "terminate", true)
   577  	err := i.handleShutdown(doTerminate, doFlush, doDeleteRingTokens)
   578  
   579  	// Stopping the module will return the modules.ErrStopProcess error. This is
   580  	// needed so the Loki process is shut down completely.
   581  	if err == nil || err == modules.ErrStopProcess {
   582  		w.WriteHeader(http.StatusNoContent)
   583  	} else {
   584  		w.WriteHeader(http.StatusInternalServerError)
   585  		_, _ = w.Write([]byte(err.Error()))
   586  	}
   587  }
   588  
   589  // handleShutdown triggers the following operations:
   590  //     * Change the state of ring to stop accepting writes.
   591  //     * optional: Flush all the chunks.
   592  //     * optional: Delete ring tokens file
   593  //     * Unregister from KV store
   594  //     * optional: Terminate process (handled by service manager in loki.go)
   595  func (i *Ingester) handleShutdown(terminate, flush, del bool) error {
   596  	i.lifecycler.SetFlushOnShutdown(flush)
   597  	i.lifecycler.SetClearTokensOnShutdown(del)
   598  	i.lifecycler.SetUnregisterOnShutdown(true)
   599  	i.terminateOnShutdown = terminate
   600  	return services.StopAndAwaitTerminated(context.Background(), i)
   601  }
   602  
   603  // Push implements logproto.Pusher.
   604  func (i *Ingester) Push(ctx context.Context, req *logproto.PushRequest) (*logproto.PushResponse, error) {
   605  	instanceID, err := tenant.TenantID(ctx)
   606  	if err != nil {
   607  		return nil, err
   608  	} else if i.readonly {
   609  		return nil, ErrReadOnly
   610  	}
   611  
   612  	instance, err := i.GetOrCreateInstance(instanceID)
   613  	if err != nil {
   614  		return &logproto.PushResponse{}, err
   615  	}
   616  	err = instance.Push(ctx, req)
   617  	return &logproto.PushResponse{}, err
   618  }
   619  
   620  func (i *Ingester) GetOrCreateInstance(instanceID string) (*instance, error) { //nolint:revive
   621  	inst, ok := i.getInstanceByID(instanceID)
   622  	if ok {
   623  		return inst, nil
   624  	}
   625  
   626  	i.instancesMtx.Lock()
   627  	defer i.instancesMtx.Unlock()
   628  	inst, ok = i.instances[instanceID]
   629  	if !ok {
   630  		var err error
   631  		inst, err = newInstance(&i.cfg, i.periodicConfigs, instanceID, i.limiter, i.tenantConfigs, i.wal, i.metrics, i.flushOnShutdownSwitch, i.chunkFilter)
   632  		if err != nil {
   633  			return nil, err
   634  		}
   635  		i.instances[instanceID] = inst
   636  		activeTenantsStats.Set(int64(len(i.instances)))
   637  	}
   638  	return inst, nil
   639  }
   640  
   641  // Query the ingests for log streams matching a set of matchers.
   642  func (i *Ingester) Query(req *logproto.QueryRequest, queryServer logproto.Querier_QueryServer) error {
   643  	// initialize stats collection for ingester queries.
   644  	_, ctx := stats.NewContext(queryServer.Context())
   645  
   646  	instanceID, err := tenant.TenantID(ctx)
   647  	if err != nil {
   648  		return err
   649  	}
   650  
   651  	instance, err := i.GetOrCreateInstance(instanceID)
   652  	if err != nil {
   653  		return err
   654  	}
   655  	it, err := instance.Query(ctx, logql.SelectLogParams{QueryRequest: req})
   656  	if err != nil {
   657  		return err
   658  	}
   659  
   660  	if start, end, ok := buildStoreRequest(i.cfg, req.Start, req.End, time.Now()); ok {
   661  		storeReq := logql.SelectLogParams{QueryRequest: &logproto.QueryRequest{
   662  			Selector:  req.Selector,
   663  			Direction: req.Direction,
   664  			Start:     start,
   665  			End:       end,
   666  			Limit:     req.Limit,
   667  			Shards:    req.Shards,
   668  			Deletes:   req.Deletes,
   669  		}}
   670  		storeItr, err := i.store.SelectLogs(ctx, storeReq)
   671  		if err != nil {
   672  			errUtil.LogErrorWithContext(ctx, "closing iterator", it.Close)
   673  			return err
   674  		}
   675  		it = iter.NewMergeEntryIterator(ctx, []iter.EntryIterator{it, storeItr}, req.Direction)
   676  	}
   677  
   678  	defer errUtil.LogErrorWithContext(ctx, "closing iterator", it.Close)
   679  
   680  	// sendBatches uses -1 to specify no limit.
   681  	batchLimit := int32(req.Limit)
   682  	if batchLimit == 0 {
   683  		batchLimit = -1
   684  	}
   685  
   686  	return sendBatches(ctx, it, queryServer, batchLimit)
   687  }
   688  
   689  // QuerySample the ingesters for series from logs matching a set of matchers.
   690  func (i *Ingester) QuerySample(req *logproto.SampleQueryRequest, queryServer logproto.Querier_QuerySampleServer) error {
   691  	// initialize stats collection for ingester queries.
   692  	_, ctx := stats.NewContext(queryServer.Context())
   693  
   694  	instanceID, err := tenant.TenantID(ctx)
   695  	if err != nil {
   696  		return err
   697  	}
   698  
   699  	instance, err := i.GetOrCreateInstance(instanceID)
   700  	if err != nil {
   701  		return err
   702  	}
   703  	it, err := instance.QuerySample(ctx, logql.SelectSampleParams{SampleQueryRequest: req})
   704  	if err != nil {
   705  		return err
   706  	}
   707  
   708  	if start, end, ok := buildStoreRequest(i.cfg, req.Start, req.End, time.Now()); ok {
   709  		storeReq := logql.SelectSampleParams{SampleQueryRequest: &logproto.SampleQueryRequest{
   710  			Start:    start,
   711  			End:      end,
   712  			Selector: req.Selector,
   713  			Shards:   req.Shards,
   714  			Deletes:  req.Deletes,
   715  		}}
   716  		storeItr, err := i.store.SelectSamples(ctx, storeReq)
   717  		if err != nil {
   718  			errUtil.LogErrorWithContext(ctx, "closing iterator", it.Close)
   719  			return err
   720  		}
   721  
   722  		it = iter.NewMergeSampleIterator(ctx, []iter.SampleIterator{it, storeItr})
   723  	}
   724  
   725  	defer errUtil.LogErrorWithContext(ctx, "closing iterator", it.Close)
   726  
   727  	return sendSampleBatches(ctx, it, queryServer)
   728  }
   729  
   730  // asyncStoreMaxLookBack returns a max look back period only if active index type is one of async index stores like `boltdb-shipper` and `tsdb`.
   731  // max look back is limited to from time of async store config.
   732  // It considers previous periodic config's from time if that also has async index type.
   733  // This is to limit the lookback to only async stores where relevant.
   734  func (i *Ingester) asyncStoreMaxLookBack() time.Duration {
   735  	activePeriodicConfigIndex := config.ActivePeriodConfig(i.periodicConfigs)
   736  	activePeriodicConfig := i.periodicConfigs[activePeriodicConfigIndex]
   737  	if activePeriodicConfig.IndexType != config.BoltDBShipperType && activePeriodicConfig.IndexType != config.TSDBType {
   738  		return 0
   739  	}
   740  
   741  	startTime := activePeriodicConfig.From
   742  	if activePeriodicConfigIndex != 0 && (i.periodicConfigs[activePeriodicConfigIndex-1].IndexType == config.BoltDBShipperType ||
   743  		i.periodicConfigs[activePeriodicConfigIndex-1].IndexType == config.TSDBType) {
   744  		startTime = i.periodicConfigs[activePeriodicConfigIndex-1].From
   745  	}
   746  
   747  	maxLookBack := time.Since(startTime.Time.Time())
   748  	return maxLookBack
   749  }
   750  
   751  // GetChunkIDs is meant to be used only when using an async store like boltdb-shipper or tsdb.
   752  func (i *Ingester) GetChunkIDs(ctx context.Context, req *logproto.GetChunkIDsRequest) (*logproto.GetChunkIDsResponse, error) {
   753  	orgID, err := tenant.TenantID(ctx)
   754  	if err != nil {
   755  		return nil, err
   756  	}
   757  
   758  	asyncStoreMaxLookBack := i.asyncStoreMaxLookBack()
   759  	if asyncStoreMaxLookBack == 0 {
   760  		return &logproto.GetChunkIDsResponse{}, nil
   761  	}
   762  
   763  	reqStart := req.Start
   764  	reqStart = adjustQueryStartTime(asyncStoreMaxLookBack, reqStart, time.Now())
   765  
   766  	// parse the request
   767  	start, end := errUtil.RoundToMilliseconds(reqStart, req.End)
   768  	matchers, err := syntax.ParseMatchers(req.Matchers)
   769  	if err != nil {
   770  		return nil, err
   771  	}
   772  
   773  	// get chunk references
   774  	chunksGroups, _, err := i.store.GetChunkRefs(ctx, orgID, start, end, matchers...)
   775  	if err != nil {
   776  		return nil, err
   777  	}
   778  
   779  	// todo (Callum) ingester should maybe store the whole schema config?
   780  	s := config.SchemaConfig{
   781  		Configs: i.periodicConfigs,
   782  	}
   783  
   784  	// build the response
   785  	resp := logproto.GetChunkIDsResponse{ChunkIDs: []string{}}
   786  	for _, chunks := range chunksGroups {
   787  		for _, chk := range chunks {
   788  			resp.ChunkIDs = append(resp.ChunkIDs, s.ExternalKey(chk.ChunkRef))
   789  		}
   790  	}
   791  
   792  	return &resp, nil
   793  }
   794  
   795  // Label returns the set of labels for the stream this ingester knows about.
   796  func (i *Ingester) Label(ctx context.Context, req *logproto.LabelRequest) (*logproto.LabelResponse, error) {
   797  	userID, err := tenant.TenantID(ctx)
   798  	if err != nil {
   799  		return nil, err
   800  	}
   801  
   802  	instance, err := i.GetOrCreateInstance(userID)
   803  	if err != nil {
   804  		return nil, err
   805  	}
   806  	resp, err := instance.Label(ctx, req)
   807  	if err != nil {
   808  		return nil, err
   809  	}
   810  
   811  	if req.Start == nil {
   812  		return resp, nil
   813  	}
   814  
   815  	// Only continue if the active index type is one of async index store types or QueryStore flag is true.
   816  	asyncStoreMaxLookBack := i.asyncStoreMaxLookBack()
   817  	if asyncStoreMaxLookBack == 0 && !i.cfg.QueryStore {
   818  		return resp, nil
   819  	}
   820  
   821  	var cs storage.Store
   822  	var ok bool
   823  	if cs, ok = i.store.(storage.Store); !ok {
   824  		return resp, nil
   825  	}
   826  
   827  	maxLookBackPeriod := i.cfg.QueryStoreMaxLookBackPeriod
   828  	if asyncStoreMaxLookBack != 0 {
   829  		maxLookBackPeriod = asyncStoreMaxLookBack
   830  	}
   831  	// Adjust the start time based on QueryStoreMaxLookBackPeriod.
   832  	start := adjustQueryStartTime(maxLookBackPeriod, *req.Start, time.Now())
   833  	if start.After(*req.End) {
   834  		// The request is older than we are allowed to query the store, just return what we have.
   835  		return resp, nil
   836  	}
   837  	from, through := model.TimeFromUnixNano(start.UnixNano()), model.TimeFromUnixNano(req.End.UnixNano())
   838  	var storeValues []string
   839  	if req.Values {
   840  		storeValues, err = cs.LabelValuesForMetricName(ctx, userID, from, through, "logs", req.Name)
   841  		if err != nil {
   842  			return nil, err
   843  		}
   844  	} else {
   845  		storeValues, err = cs.LabelNamesForMetricName(ctx, userID, from, through, "logs")
   846  		if err != nil {
   847  			return nil, err
   848  		}
   849  	}
   850  
   851  	return &logproto.LabelResponse{
   852  		Values: errUtil.MergeStringLists(resp.Values, storeValues),
   853  	}, nil
   854  }
   855  
   856  // Series queries the ingester for log stream identifiers (label sets) matching a set of matchers
   857  func (i *Ingester) Series(ctx context.Context, req *logproto.SeriesRequest) (*logproto.SeriesResponse, error) {
   858  	instanceID, err := tenant.TenantID(ctx)
   859  	if err != nil {
   860  		return nil, err
   861  	}
   862  
   863  	instance, err := i.GetOrCreateInstance(instanceID)
   864  	if err != nil {
   865  		return nil, err
   866  	}
   867  	return instance.Series(ctx, req)
   868  }
   869  
   870  func (i *Ingester) GetStats(ctx context.Context, req *logproto.IndexStatsRequest) (*logproto.IndexStatsResponse, error) {
   871  	user, err := tenant.TenantID(ctx)
   872  	if err != nil {
   873  		return nil, err
   874  	}
   875  
   876  	instance, err := i.GetOrCreateInstance(user)
   877  	if err != nil {
   878  		return nil, err
   879  	}
   880  
   881  	matchers, err := syntax.ParseMatchers(req.Matchers)
   882  	if err != nil {
   883  		return nil, err
   884  	}
   885  
   886  	type f func() (*logproto.IndexStatsResponse, error)
   887  	jobs := []f{
   888  		f(func() (*logproto.IndexStatsResponse, error) {
   889  			return instance.GetStats(ctx, req)
   890  		}),
   891  		f(func() (*logproto.IndexStatsResponse, error) {
   892  			return i.store.Stats(ctx, user, req.From, req.Through, matchers...)
   893  		}),
   894  	}
   895  	resps := make([]*logproto.IndexStatsResponse, len(jobs))
   896  
   897  	if err := concurrency.ForEachJob(
   898  		ctx,
   899  		len(jobs),
   900  		2,
   901  		func(ctx context.Context, idx int) error {
   902  			res, err := jobs[idx]()
   903  			resps[idx] = res
   904  			return err
   905  		},
   906  	); err != nil {
   907  		return nil, err
   908  	}
   909  
   910  	merged := index_stats.MergeStats(resps...)
   911  	return &merged, nil
   912  }
   913  
   914  // Watch implements grpc_health_v1.HealthCheck.
   915  func (*Ingester) Watch(*grpc_health_v1.HealthCheckRequest, grpc_health_v1.Health_WatchServer) error {
   916  	return nil
   917  }
   918  
   919  // ReadinessHandler is used to indicate to k8s when the ingesters are ready for
   920  // the addition removal of another ingester. Returns 204 when the ingester is
   921  // ready, 500 otherwise.
   922  func (i *Ingester) CheckReady(ctx context.Context) error {
   923  	if s := i.State(); s != services.Running && s != services.Stopping {
   924  		return fmt.Errorf("ingester not ready: %v", s)
   925  	}
   926  	return i.lifecycler.CheckReady(ctx)
   927  }
   928  
   929  func (i *Ingester) getInstanceByID(id string) (*instance, bool) {
   930  	i.instancesMtx.RLock()
   931  	defer i.instancesMtx.RUnlock()
   932  
   933  	inst, ok := i.instances[id]
   934  	return inst, ok
   935  }
   936  
   937  func (i *Ingester) getInstances() []*instance {
   938  	i.instancesMtx.RLock()
   939  	defer i.instancesMtx.RUnlock()
   940  
   941  	instances := make([]*instance, 0, len(i.instances))
   942  	for _, instance := range i.instances {
   943  		instances = append(instances, instance)
   944  	}
   945  	return instances
   946  }
   947  
   948  // Tail logs matching given query
   949  func (i *Ingester) Tail(req *logproto.TailRequest, queryServer logproto.Querier_TailServer) error {
   950  	select {
   951  	case <-i.tailersQuit:
   952  		return errors.New("Ingester is stopping")
   953  	default:
   954  	}
   955  
   956  	instanceID, err := tenant.TenantID(queryServer.Context())
   957  	if err != nil {
   958  		return err
   959  	}
   960  
   961  	instance, err := i.GetOrCreateInstance(instanceID)
   962  	if err != nil {
   963  		return err
   964  	}
   965  	tailer, err := newTailer(instanceID, req.Query, queryServer, i.cfg.MaxDroppedStreams)
   966  	if err != nil {
   967  		return err
   968  	}
   969  
   970  	if err := instance.addNewTailer(queryServer.Context(), tailer); err != nil {
   971  		return err
   972  	}
   973  	tailer.loop()
   974  	return nil
   975  }
   976  
   977  // TailersCount returns count of active tail requests from a user
   978  func (i *Ingester) TailersCount(ctx context.Context, in *logproto.TailersCountRequest) (*logproto.TailersCountResponse, error) {
   979  	instanceID, err := tenant.TenantID(ctx)
   980  	if err != nil {
   981  		return nil, err
   982  	}
   983  
   984  	resp := logproto.TailersCountResponse{}
   985  
   986  	instance, ok := i.getInstanceByID(instanceID)
   987  	if ok {
   988  		resp.Count = instance.openTailersCount()
   989  	}
   990  
   991  	return &resp, nil
   992  }
   993  
   994  // buildStoreRequest returns a store request from an ingester request, returns nit if QueryStore is set to false in configuration.
   995  // The request may be truncated due to QueryStoreMaxLookBackPeriod which limits the range of request to make sure
   996  // we only query enough to not miss any data and not add too to many duplicates by covering the who time range in query.
   997  func buildStoreRequest(cfg Config, start, end, now time.Time) (time.Time, time.Time, bool) {
   998  	if !cfg.QueryStore {
   999  		return time.Time{}, time.Time{}, false
  1000  	}
  1001  	start = adjustQueryStartTime(cfg.QueryStoreMaxLookBackPeriod, start, now)
  1002  
  1003  	if start.After(end) {
  1004  		return time.Time{}, time.Time{}, false
  1005  	}
  1006  	return start, end, true
  1007  }
  1008  
  1009  func adjustQueryStartTime(maxLookBackPeriod time.Duration, start, now time.Time) time.Time {
  1010  	if maxLookBackPeriod > 0 {
  1011  		oldestStartTime := now.Add(-maxLookBackPeriod)
  1012  		if oldestStartTime.After(start) {
  1013  			return oldestStartTime
  1014  		}
  1015  	}
  1016  	return start
  1017  }