github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ingester/flush.go (about)

     1  package ingester
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"net/http"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/go-kit/log/level"
    11  	"github.com/prometheus/client_golang/prometheus"
    12  	"github.com/prometheus/common/model"
    13  	"github.com/prometheus/prometheus/model/labels"
    14  	"github.com/weaveworks/common/user"
    15  	"golang.org/x/net/context"
    16  
    17  	"github.com/grafana/dskit/tenant"
    18  
    19  	"github.com/grafana/loki/pkg/chunkenc"
    20  	"github.com/grafana/loki/pkg/storage/chunk"
    21  	"github.com/grafana/loki/pkg/util"
    22  	loki_util "github.com/grafana/loki/pkg/util"
    23  	util_log "github.com/grafana/loki/pkg/util/log"
    24  )
    25  
    26  const (
    27  	// Backoff for retrying 'immediate' flushes. Only counts for queue
    28  	// position, not wallclock time.
    29  	flushBackoff = 1 * time.Second
    30  
    31  	nameLabel = "__name__"
    32  	logsValue = "logs"
    33  
    34  	flushReasonIdle   = "idle"
    35  	flushReasonMaxAge = "max_age"
    36  	flushReasonForced = "forced"
    37  	flushReasonFull   = "full"
    38  	flushReasonSynced = "synced"
    39  )
    40  
    41  // Note: this is called both during the WAL replay (zero or more times)
    42  // and then after replay as well.
    43  func (i *Ingester) InitFlushQueues() {
    44  	i.flushQueuesDone.Add(i.cfg.ConcurrentFlushes)
    45  	for j := 0; j < i.cfg.ConcurrentFlushes; j++ {
    46  		i.flushQueues[j] = util.NewPriorityQueue(flushQueueLength)
    47  		go i.flushLoop(j)
    48  	}
    49  }
    50  
    51  // Flush triggers a flush of all the chunks and closes the flush queues.
    52  // Called from the Lifecycler as part of the ingester shutdown.
    53  func (i *Ingester) Flush() {
    54  	i.flush(true)
    55  }
    56  
    57  func (i *Ingester) flush(mayRemoveStreams bool) {
    58  	i.sweepUsers(true, mayRemoveStreams)
    59  
    60  	// Close the flush queues, to unblock waiting workers.
    61  	for _, flushQueue := range i.flushQueues {
    62  		flushQueue.Close()
    63  	}
    64  
    65  	i.flushQueuesDone.Wait()
    66  	level.Debug(util_log.Logger).Log("msg", "flush queues have drained")
    67  }
    68  
    69  // FlushHandler triggers a flush of all in memory chunks.  Mainly used for
    70  // local testing.
    71  func (i *Ingester) FlushHandler(w http.ResponseWriter, _ *http.Request) {
    72  	i.sweepUsers(true, true)
    73  	w.WriteHeader(http.StatusNoContent)
    74  }
    75  
    76  type flushOp struct {
    77  	from      model.Time
    78  	userID    string
    79  	fp        model.Fingerprint
    80  	immediate bool
    81  }
    82  
    83  func (o *flushOp) Key() string {
    84  	return fmt.Sprintf("%s-%s-%v", o.userID, o.fp, o.immediate)
    85  }
    86  
    87  func (o *flushOp) Priority() int64 {
    88  	return -int64(o.from)
    89  }
    90  
    91  // sweepUsers periodically schedules series for flushing and garbage collects users with no series
    92  func (i *Ingester) sweepUsers(immediate, mayRemoveStreams bool) {
    93  	instances := i.getInstances()
    94  
    95  	for _, instance := range instances {
    96  		i.sweepInstance(instance, immediate, mayRemoveStreams)
    97  	}
    98  }
    99  
   100  func (i *Ingester) sweepInstance(instance *instance, immediate, mayRemoveStreams bool) {
   101  	_ = instance.streams.ForEach(func(s *stream) (bool, error) {
   102  		i.sweepStream(instance, s, immediate)
   103  		i.removeFlushedChunks(instance, s, mayRemoveStreams)
   104  		return true, nil
   105  	})
   106  }
   107  
   108  func (i *Ingester) sweepStream(instance *instance, stream *stream, immediate bool) {
   109  	stream.chunkMtx.RLock()
   110  	defer stream.chunkMtx.RUnlock()
   111  	if len(stream.chunks) == 0 {
   112  		return
   113  	}
   114  
   115  	lastChunk := stream.chunks[len(stream.chunks)-1]
   116  	shouldFlush, _ := i.shouldFlushChunk(&lastChunk)
   117  	if len(stream.chunks) == 1 && !immediate && !shouldFlush {
   118  		return
   119  	}
   120  
   121  	flushQueueIndex := int(uint64(stream.fp) % uint64(i.cfg.ConcurrentFlushes))
   122  	firstTime, _ := stream.chunks[0].chunk.Bounds()
   123  	i.flushQueues[flushQueueIndex].Enqueue(&flushOp{
   124  		model.TimeFromUnixNano(firstTime.UnixNano()), instance.instanceID,
   125  		stream.fp, immediate,
   126  	})
   127  }
   128  
   129  func (i *Ingester) flushLoop(j int) {
   130  	defer func() {
   131  		level.Debug(util_log.Logger).Log("msg", "Ingester.flushLoop() exited")
   132  		i.flushQueuesDone.Done()
   133  	}()
   134  
   135  	for {
   136  		o := i.flushQueues[j].Dequeue()
   137  		if o == nil {
   138  			return
   139  		}
   140  		op := o.(*flushOp)
   141  
   142  		err := i.flushUserSeries(op.userID, op.fp, op.immediate)
   143  		if err != nil {
   144  			level.Error(util_log.WithUserID(op.userID, util_log.Logger)).Log("msg", "failed to flush", "err", err)
   145  		}
   146  
   147  		// If we're exiting & we failed to flush, put the failed operation
   148  		// back in the queue at a later point.
   149  		if op.immediate && err != nil {
   150  			op.from = op.from.Add(flushBackoff)
   151  			i.flushQueues[j].Enqueue(op)
   152  		}
   153  	}
   154  }
   155  
   156  func (i *Ingester) flushUserSeries(userID string, fp model.Fingerprint, immediate bool) error {
   157  	instance, ok := i.getInstanceByID(userID)
   158  	if !ok {
   159  		return nil
   160  	}
   161  
   162  	chunks, labels, chunkMtx := i.collectChunksToFlush(instance, fp, immediate)
   163  	if len(chunks) < 1 {
   164  		return nil
   165  	}
   166  
   167  	lbs := labels.String()
   168  	level.Info(util_log.Logger).Log("msg", "flushing stream", "user", userID, "fp", fp, "immediate", immediate, "num_chunks", len(chunks), "labels", lbs)
   169  
   170  	ctx := user.InjectOrgID(context.Background(), userID)
   171  	ctx, cancel := context.WithTimeout(ctx, i.cfg.FlushOpTimeout)
   172  	defer cancel()
   173  	err := i.flushChunks(ctx, fp, labels, chunks, chunkMtx)
   174  	if err != nil {
   175  		return fmt.Errorf("failed to flush chunks: %w, num_chunks: %d, labels: %s", err, len(chunks), lbs)
   176  	}
   177  
   178  	return nil
   179  }
   180  
   181  func (i *Ingester) collectChunksToFlush(instance *instance, fp model.Fingerprint, immediate bool) ([]*chunkDesc, labels.Labels, *sync.RWMutex) {
   182  	var stream *stream
   183  	var ok bool
   184  	stream, ok = instance.streams.LoadByFP(fp)
   185  
   186  	if !ok {
   187  		return nil, nil, nil
   188  	}
   189  
   190  	stream.chunkMtx.Lock()
   191  	defer stream.chunkMtx.Unlock()
   192  
   193  	var result []*chunkDesc
   194  	for j := range stream.chunks {
   195  		shouldFlush, reason := i.shouldFlushChunk(&stream.chunks[j])
   196  		if immediate || shouldFlush {
   197  			// Ensure no more writes happen to this chunk.
   198  			if !stream.chunks[j].closed {
   199  				stream.chunks[j].closed = true
   200  			}
   201  			// Flush this chunk if it hasn't already been successfully flushed.
   202  			if stream.chunks[j].flushed.IsZero() {
   203  				if immediate {
   204  					reason = flushReasonForced
   205  				}
   206  				stream.chunks[j].reason = reason
   207  
   208  				result = append(result, &stream.chunks[j])
   209  			}
   210  		}
   211  	}
   212  	return result, stream.labels, &stream.chunkMtx
   213  }
   214  
   215  func (i *Ingester) shouldFlushChunk(chunk *chunkDesc) (bool, string) {
   216  	// Append should close the chunk when the a new one is added.
   217  	if chunk.closed {
   218  		if chunk.synced {
   219  			return true, flushReasonSynced
   220  		}
   221  		return true, flushReasonFull
   222  	}
   223  
   224  	if time.Since(chunk.lastUpdated) > i.cfg.MaxChunkIdle {
   225  		return true, flushReasonIdle
   226  	}
   227  
   228  	if from, to := chunk.chunk.Bounds(); to.Sub(from) > i.cfg.MaxChunkAge {
   229  		return true, flushReasonMaxAge
   230  	}
   231  
   232  	return false, ""
   233  }
   234  
   235  func (i *Ingester) removeFlushedChunks(instance *instance, stream *stream, mayRemoveStream bool) {
   236  	now := time.Now()
   237  
   238  	stream.chunkMtx.Lock()
   239  	defer stream.chunkMtx.Unlock()
   240  	prevNumChunks := len(stream.chunks)
   241  	var subtracted int
   242  	for len(stream.chunks) > 0 {
   243  		if stream.chunks[0].flushed.IsZero() || now.Sub(stream.chunks[0].flushed) < i.cfg.RetainPeriod {
   244  			break
   245  		}
   246  
   247  		subtracted += stream.chunks[0].chunk.UncompressedSize()
   248  		stream.chunks[0].chunk = nil // erase reference so the chunk can be garbage-collected
   249  		stream.chunks = stream.chunks[1:]
   250  	}
   251  	i.metrics.memoryChunks.Sub(float64(prevNumChunks - len(stream.chunks)))
   252  
   253  	// Signal how much data has been flushed to lessen any WAL replay pressure.
   254  	i.replayController.Sub(int64(subtracted))
   255  
   256  	if mayRemoveStream && len(stream.chunks) == 0 {
   257  		// Unlock first, then lock inside streams' lock to prevent deadlock
   258  		stream.chunkMtx.Unlock()
   259  		// Only lock streamsMap when it's needed to remove a stream
   260  		instance.streams.WithLock(func() {
   261  			stream.chunkMtx.Lock()
   262  			// Double check length
   263  			if len(stream.chunks) == 0 {
   264  				instance.removeStream(stream)
   265  			}
   266  		})
   267  	}
   268  }
   269  
   270  // flushChunks iterates over given chunkDescs, derives chunk.Chunk from them and flush them to the store, one at a time.
   271  //
   272  // If a chunk fails to be flushed, this operation is reinserted in the queue. Since previously flushed chunks
   273  // are marked as flushed, they shouldn't be flushed again.
   274  // It has to close given chunks to have have the head block included.
   275  func (i *Ingester) flushChunks(ctx context.Context, fp model.Fingerprint, labelPairs labels.Labels, cs []*chunkDesc, chunkMtx sync.Locker) error {
   276  	userID, err := tenant.TenantID(ctx)
   277  	if err != nil {
   278  		return err
   279  	}
   280  
   281  	// NB(owen-d): No longer needed in TSDB (and is removed in that code path)
   282  	// It's required by historical index stores so we keep it for now.
   283  	labelsBuilder := labels.NewBuilder(labelPairs)
   284  	labelsBuilder.Set(nameLabel, logsValue)
   285  	metric := labelsBuilder.Labels()
   286  
   287  	sizePerTenant := i.metrics.chunkSizePerTenant.WithLabelValues(userID)
   288  	countPerTenant := i.metrics.chunksPerTenant.WithLabelValues(userID)
   289  
   290  	for j, c := range cs {
   291  		if err := i.closeChunk(c, chunkMtx); err != nil {
   292  			return fmt.Errorf("chunk close for flushing: %w", err)
   293  		}
   294  
   295  		firstTime, lastTime := loki_util.RoundToMilliseconds(c.chunk.Bounds())
   296  		ch := chunk.NewChunk(
   297  			userID, fp, metric,
   298  			chunkenc.NewFacade(c.chunk, i.cfg.BlockSize, i.cfg.TargetChunkSize),
   299  			firstTime,
   300  			lastTime,
   301  		)
   302  
   303  		// encodeChunk mutates the chunk so we must pass by reference
   304  		if err := i.encodeChunk(ctx, &ch, c); err != nil {
   305  			return err
   306  		}
   307  
   308  		if err := i.flushChunk(ctx, &ch); err != nil {
   309  			return err
   310  		}
   311  
   312  		i.markChunkAsFlushed(cs[j], chunkMtx)
   313  
   314  		reason := func() string {
   315  			chunkMtx.Lock()
   316  			defer chunkMtx.Unlock()
   317  
   318  			return c.reason
   319  		}()
   320  
   321  		i.reportFlushedChunkStatistics(&ch, c, sizePerTenant, countPerTenant, reason)
   322  	}
   323  
   324  	return nil
   325  }
   326  
   327  // markChunkAsFlushed mark a chunk to make sure it won't be flushed if this operation fails.
   328  func (i *Ingester) markChunkAsFlushed(desc *chunkDesc, chunkMtx sync.Locker) {
   329  	chunkMtx.Lock()
   330  	defer chunkMtx.Unlock()
   331  	desc.flushed = time.Now()
   332  }
   333  
   334  // closeChunk closes the given chunk while locking it to ensure that new blocks are cut before flushing.
   335  //
   336  // If the chunk isn't closed, data in the head block isn't included.
   337  func (i *Ingester) closeChunk(desc *chunkDesc, chunkMtx sync.Locker) error {
   338  	chunkMtx.Lock()
   339  	defer chunkMtx.Unlock()
   340  
   341  	return desc.chunk.Close()
   342  }
   343  
   344  // encodeChunk encodes a chunk.Chunk based on the given chunkDesc.
   345  //
   346  // If the encoding is unsuccessful the flush operation is reinserted in the queue which will cause
   347  // the encoding for a given chunk to be evaluated again.
   348  func (i *Ingester) encodeChunk(ctx context.Context, ch *chunk.Chunk, desc *chunkDesc) error {
   349  	if err := ctx.Err(); err != nil {
   350  		return err
   351  	}
   352  	start := time.Now()
   353  	chunkBytesSize := desc.chunk.BytesSize() + 4*1024 // size + 4kB should be enough room for cortex header
   354  	if err := ch.EncodeTo(bytes.NewBuffer(make([]byte, 0, chunkBytesSize))); err != nil {
   355  		return fmt.Errorf("chunk encoding: %w", err)
   356  	}
   357  	i.metrics.chunkEncodeTime.Observe(time.Since(start).Seconds())
   358  	return nil
   359  }
   360  
   361  // flushChunk flushes the given chunk to the store.
   362  //
   363  // If the flush is successful, metrics for this flush are to be reported.
   364  // If the flush isn't successful, the operation for this userID is requeued allowing this and all other unflushed
   365  // chunk to have another opportunity to be flushed.
   366  func (i *Ingester) flushChunk(ctx context.Context, ch *chunk.Chunk) error {
   367  	if err := i.store.Put(ctx, []chunk.Chunk{*ch}); err != nil {
   368  		return fmt.Errorf("store put chunk: %w", err)
   369  	}
   370  	i.metrics.flushedChunksStats.Inc(1)
   371  	return nil
   372  }
   373  
   374  // reportFlushedChunkStatistics calculate overall statistics of flushed chunks without compromising the flush process.
   375  func (i *Ingester) reportFlushedChunkStatistics(ch *chunk.Chunk, desc *chunkDesc, sizePerTenant prometheus.Counter, countPerTenant prometheus.Counter, reason string) {
   376  	byt, err := ch.Encoded()
   377  	if err != nil {
   378  		level.Error(util_log.Logger).Log("msg", "failed to encode flushed wire chunk", "err", err)
   379  		return
   380  	}
   381  
   382  	i.metrics.chunksFlushedPerReason.WithLabelValues(reason).Add(1)
   383  
   384  	compressedSize := float64(len(byt))
   385  	uncompressedSize, ok := chunkenc.UncompressedSize(ch.Data)
   386  
   387  	if ok && compressedSize > 0 {
   388  		i.metrics.chunkCompressionRatio.Observe(float64(uncompressedSize) / compressedSize)
   389  	}
   390  
   391  	utilization := ch.Data.Utilization()
   392  	i.metrics.chunkUtilization.Observe(utilization)
   393  	numEntries := desc.chunk.Size()
   394  	i.metrics.chunkEntries.Observe(float64(numEntries))
   395  	i.metrics.chunkSize.Observe(compressedSize)
   396  	sizePerTenant.Add(compressedSize)
   397  	countPerTenant.Inc()
   398  
   399  	boundsFrom, boundsTo := desc.chunk.Bounds()
   400  	i.metrics.chunkAge.Observe(time.Since(boundsFrom).Seconds())
   401  	i.metrics.chunkLifespan.Observe(boundsTo.Sub(boundsFrom).Hours())
   402  
   403  	i.metrics.flushedChunksBytesStats.Record(compressedSize)
   404  	i.metrics.flushedChunksLinesStats.Record(float64(numEntries))
   405  	i.metrics.flushedChunksUtilizationStats.Record(utilization)
   406  	i.metrics.flushedChunksAgeStats.Record(time.Since(boundsFrom).Seconds())
   407  	i.metrics.flushedChunksLifespanStats.Record(boundsTo.Sub(boundsFrom).Hours())
   408  }