github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/tsdb/compactor.go (about)

     1  package tsdb
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math"
     7  	"os"
     8  	"sync"
     9  	"time"
    10  	"unsafe"
    11  
    12  	"github.com/go-kit/log"
    13  	"github.com/go-kit/log/level"
    14  	"github.com/grafana/dskit/concurrency"
    15  	"github.com/prometheus/common/model"
    16  	"github.com/prometheus/prometheus/model/labels"
    17  
    18  	"github.com/grafana/loki/pkg/logproto"
    19  	"github.com/grafana/loki/pkg/storage/chunk"
    20  	"github.com/grafana/loki/pkg/storage/config"
    21  	"github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor"
    22  	"github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/retention"
    23  	index_shipper "github.com/grafana/loki/pkg/storage/stores/indexshipper/index"
    24  	"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
    25  )
    26  
    27  const readDBsConcurrency = 50
    28  
    29  type indexProcessor struct{}
    30  
    31  func NewIndexCompactor() compactor.IndexCompactor {
    32  	return indexProcessor{}
    33  }
    34  
    35  func (i indexProcessor) NewTableCompactor(ctx context.Context, commonIndexSet compactor.IndexSet, existingUserIndexSet map[string]compactor.IndexSet, userIndexSetFactoryFunc compactor.MakeEmptyUserIndexSetFunc, periodConfig config.PeriodConfig) compactor.TableCompactor {
    36  	return newTableCompactor(ctx, commonIndexSet, existingUserIndexSet, userIndexSetFactoryFunc, periodConfig)
    37  }
    38  
    39  func (i indexProcessor) OpenCompactedIndexFile(ctx context.Context, path, tableName, userID, workingDir string, periodConfig config.PeriodConfig, logger log.Logger) (compactor.CompactedIndex, error) {
    40  	indexFile, err := OpenShippableTSDB(path)
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  
    45  	defer func() {
    46  		if err := indexFile.Close(); err != nil {
    47  			level.Error(logger).Log("msg", "failed to close index file", "err", err)
    48  		}
    49  	}()
    50  
    51  	builder := NewBuilder()
    52  	err = indexFile.(*TSDBFile).Index.(*TSDBIndex).forSeries(ctx, nil, func(lbls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
    53  		builder.AddSeries(lbls.Copy(), fp, chks)
    54  	}, labels.MustNewMatcher(labels.MatchEqual, "", ""))
    55  	if err != nil {
    56  		return nil, err
    57  	}
    58  
    59  	builder.chunksFinalized = true
    60  
    61  	return newCompactedIndex(ctx, tableName, userID, workingDir, periodConfig, builder), nil
    62  }
    63  
    64  type tableCompactor struct {
    65  	commonIndexSet          compactor.IndexSet
    66  	existingUserIndexSet    map[string]compactor.IndexSet
    67  	userIndexSetFactoryFunc compactor.MakeEmptyUserIndexSetFunc
    68  	ctx                     context.Context
    69  	periodConfig            config.PeriodConfig
    70  	compactedIndexes        map[string]compactor.CompactedIndex
    71  }
    72  
    73  func newTableCompactor(
    74  	ctx context.Context,
    75  	commonIndexSet compactor.IndexSet,
    76  	existingUserIndexSet map[string]compactor.IndexSet,
    77  	userIndexSetFactoryFunc compactor.MakeEmptyUserIndexSetFunc,
    78  	periodConfig config.PeriodConfig,
    79  ) *tableCompactor {
    80  	return &tableCompactor{
    81  		ctx:                     ctx,
    82  		commonIndexSet:          commonIndexSet,
    83  		existingUserIndexSet:    existingUserIndexSet,
    84  		userIndexSetFactoryFunc: userIndexSetFactoryFunc,
    85  		periodConfig:            periodConfig,
    86  	}
    87  }
    88  
    89  func (t *tableCompactor) CompactTable() error {
    90  	multiTenantIndexes := t.commonIndexSet.ListSourceFiles()
    91  
    92  	var multiTenantIndices []Index
    93  	indicesMtx := sync.Mutex{}
    94  
    95  	// concurrently download and open all the multi-tenant indexes
    96  	err := concurrency.ForEachJob(t.ctx, len(multiTenantIndexes), readDBsConcurrency, func(ctx context.Context, job int) error {
    97  		downloadedAt, err := t.commonIndexSet.GetSourceFile(multiTenantIndexes[job])
    98  		if err != nil {
    99  			return err
   100  		}
   101  
   102  		defer func() {
   103  			if err := os.Remove(downloadedAt); err != nil {
   104  				level.Error(t.commonIndexSet.GetLogger()).Log("msg", "failed to remove downloaded index file", "path", downloadedAt, "err", err)
   105  			}
   106  		}()
   107  
   108  		idx, err := OpenShippableTSDB(downloadedAt)
   109  		if err != nil {
   110  			return err
   111  		}
   112  
   113  		indicesMtx.Lock()
   114  		defer indicesMtx.Unlock()
   115  		multiTenantIndices = append(multiTenantIndices, idx.(Index))
   116  
   117  		return nil
   118  	})
   119  	if err != nil {
   120  		return err
   121  	}
   122  
   123  	var multiTenantIndex Index = NoopIndex{}
   124  	if len(multiTenantIndices) > 0 {
   125  		var err error
   126  		multiTenantIndex, err = NewMultiIndex(multiTenantIndices...)
   127  		if err != nil {
   128  			return err
   129  		}
   130  	}
   131  
   132  	// find all the user ids from the multi-tenant indexes using TenantLabel.
   133  	userIDs, err := multiTenantIndex.LabelValues(t.ctx, "", 0, math.MaxInt64, TenantLabel)
   134  	if err != nil {
   135  		return err
   136  	}
   137  
   138  	// go through all the users having index in the multi-tenant indexes and setup builder for each user
   139  	// builder would combine users index from multi-tenant indexes and the existing compacted index(es)
   140  	t.compactedIndexes = make(map[string]compactor.CompactedIndex, len(userIDs))
   141  	for _, userID := range userIDs {
   142  		existingUserIndexSet, ok := t.existingUserIndexSet[userID]
   143  		if !ok {
   144  			var err error
   145  			existingUserIndexSet, err = t.userIndexSetFactoryFunc(userID)
   146  			if err != nil {
   147  				return err
   148  			}
   149  		}
   150  
   151  		builder, err := setupBuilder(t.ctx, userID, existingUserIndexSet, multiTenantIndices)
   152  		if err != nil {
   153  			return err
   154  		}
   155  
   156  		compactedIndex := newCompactedIndex(t.ctx, existingUserIndexSet.GetTableName(), userID, existingUserIndexSet.GetWorkingDir(), t.periodConfig, builder)
   157  		t.compactedIndexes[userID] = compactedIndex
   158  
   159  		if err := existingUserIndexSet.SetCompactedIndex(compactedIndex, true); err != nil {
   160  			return err
   161  		}
   162  	}
   163  
   164  	// go through existingUserIndexSet and find the ones that were not initialized now due to no updates and
   165  	// have multiple index files in the storage to merge them into a single index file.
   166  	for userID, srcIdxSet := range t.existingUserIndexSet {
   167  		if _, ok := t.compactedIndexes[userID]; ok || len(srcIdxSet.ListSourceFiles()) <= 1 {
   168  			continue
   169  		}
   170  
   171  		builder, err := setupBuilder(t.ctx, userID, srcIdxSet, []Index{})
   172  		if err != nil {
   173  			return err
   174  		}
   175  
   176  		compactedIndex := newCompactedIndex(t.ctx, srcIdxSet.GetTableName(), userID, srcIdxSet.GetWorkingDir(), t.periodConfig, builder)
   177  		t.compactedIndexes[userID] = compactedIndex
   178  		if err := srcIdxSet.SetCompactedIndex(compactedIndex, true); err != nil {
   179  			return err
   180  		}
   181  	}
   182  
   183  	if len(multiTenantIndices) > 0 {
   184  		if err := t.commonIndexSet.SetCompactedIndex(nil, true); err != nil {
   185  			return err
   186  		}
   187  	}
   188  	return nil
   189  }
   190  
   191  // setupBuilder creates a Builder for a single user.
   192  // It combines the users index from multiTenantIndexes and its existing compacted index(es)
   193  func setupBuilder(ctx context.Context, userID string, sourceIndexSet compactor.IndexSet, multiTenantIndexes []Index) (*Builder, error) {
   194  	sourceIndexes := sourceIndexSet.ListSourceFiles()
   195  	builder := NewBuilder()
   196  
   197  	// add users index from multi-tenant indexes to the builder
   198  	for _, idx := range multiTenantIndexes {
   199  		err := idx.(*TSDBFile).Index.(*TSDBIndex).forSeries(ctx, nil, func(lbls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
   200  			builder.AddSeries(withoutTenantLabel(lbls.Copy()), fp, chks)
   201  		}, withTenantLabelMatcher(userID, []*labels.Matcher{})...)
   202  		if err != nil {
   203  			return nil, err
   204  		}
   205  	}
   206  
   207  	// download all the existing compacted indexes and add them to the builder
   208  	for _, sourceIndex := range sourceIndexes {
   209  		path, err := sourceIndexSet.GetSourceFile(sourceIndex)
   210  		if err != nil {
   211  			return nil, err
   212  		}
   213  
   214  		defer func() {
   215  			if err := os.Remove(path); err != nil {
   216  				level.Error(sourceIndexSet.GetLogger()).Log("msg", "error removing source index file", "err", err)
   217  			}
   218  		}()
   219  
   220  		indexFile, err := OpenShippableTSDB(path)
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  
   225  		defer func() {
   226  			if err := indexFile.Close(); err != nil {
   227  				level.Error(sourceIndexSet.GetLogger()).Log("msg", "failed to close index file", "err", err)
   228  			}
   229  		}()
   230  
   231  		err = indexFile.(*TSDBFile).Index.(*TSDBIndex).forSeries(ctx, nil, func(lbls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
   232  			builder.AddSeries(lbls.Copy(), fp, chks)
   233  		}, labels.MustNewMatcher(labels.MatchEqual, "", ""))
   234  		if err != nil {
   235  			return nil, err
   236  		}
   237  	}
   238  
   239  	// finalize the chunks to remove the duplicates and sort them
   240  	builder.FinalizeChunks()
   241  
   242  	return builder, nil
   243  }
   244  
   245  type compactedIndex struct {
   246  	ctx           context.Context
   247  	userID        string
   248  	builder       *Builder
   249  	workingDir    string
   250  	tableInterval model.Interval
   251  	periodConfig  config.PeriodConfig
   252  
   253  	indexChunks     []chunk.Chunk
   254  	deleteChunks    map[string][]index.ChunkMeta
   255  	seriesToCleanup map[string]struct{}
   256  }
   257  
   258  func newCompactedIndex(ctx context.Context, tableName, userID, workingDir string, periodConfig config.PeriodConfig, builder *Builder) *compactedIndex {
   259  	return &compactedIndex{
   260  		ctx:             ctx,
   261  		userID:          userID,
   262  		builder:         builder,
   263  		workingDir:      workingDir,
   264  		periodConfig:    periodConfig,
   265  		tableInterval:   retention.ExtractIntervalFromTableName(tableName),
   266  		deleteChunks:    map[string][]index.ChunkMeta{},
   267  		seriesToCleanup: map[string]struct{}{},
   268  	}
   269  }
   270  
   271  // ForEachChunk iterates over all the chunks in the builder and calls the callback function.
   272  func (c *compactedIndex) ForEachChunk(ctx context.Context, callback retention.ChunkEntryCallback) error {
   273  	schemaCfg := config.SchemaConfig{
   274  		Configs: []config.PeriodConfig{c.periodConfig},
   275  	}
   276  
   277  	chunkEntry := retention.ChunkEntry{
   278  		ChunkRef: retention.ChunkRef{
   279  			UserID: getUnsafeBytes(c.userID),
   280  		},
   281  	}
   282  	logprotoChunkRef := logproto.ChunkRef{
   283  		UserID: c.userID,
   284  	}
   285  	for seriesID, stream := range c.builder.streams {
   286  		logprotoChunkRef.Fingerprint = uint64(stream.fp)
   287  		chunkEntry.SeriesID = getUnsafeBytes(seriesID)
   288  		chunkEntry.Labels = withoutTenantLabel(stream.labels)
   289  
   290  		for i := 0; i < len(stream.chunks) && ctx.Err() == nil; i++ {
   291  			chk := stream.chunks[i]
   292  			logprotoChunkRef.From = chk.From()
   293  			logprotoChunkRef.Through = chk.Through()
   294  			logprotoChunkRef.Checksum = chk.Checksum
   295  
   296  			chunkEntry.ChunkID = getUnsafeBytes(schemaCfg.ExternalKey(logprotoChunkRef))
   297  			chunkEntry.From = logprotoChunkRef.From
   298  			chunkEntry.Through = logprotoChunkRef.Through
   299  
   300  			deleteChunk, err := callback(chunkEntry)
   301  			if err != nil {
   302  				return err
   303  			}
   304  
   305  			if deleteChunk {
   306  				// add the chunk to the list of chunks to delete which would be taken care of while building the index.
   307  				c.deleteChunks[seriesID] = append(c.deleteChunks[seriesID], chk)
   308  			}
   309  		}
   310  	}
   311  
   312  	return ctx.Err()
   313  }
   314  
   315  // IndexChunk adds the chunk to the list of chunks to index.
   316  // Before accepting the chunk it checks if it falls within the tableInterval and rejects it if not.
   317  func (c *compactedIndex) IndexChunk(chk chunk.Chunk) (bool, error) {
   318  	if chk.From > c.tableInterval.End || c.tableInterval.Start > chk.Through {
   319  		return false, nil
   320  	}
   321  
   322  	c.indexChunks = append(c.indexChunks, chk)
   323  
   324  	return true, nil
   325  }
   326  
   327  // CleanupSeries removes the series from the builder(including its chunks) and deletes the list of chunks lined up for deletion.
   328  func (c *compactedIndex) CleanupSeries(_ []byte, lbls labels.Labels) error {
   329  	seriesID := lbls.String()
   330  	if _, ok := c.builder.streams[seriesID]; !ok {
   331  		return fmt.Errorf("series cleanup not allowed on non-existing series %s", seriesID)
   332  	}
   333  	delete(c.builder.streams, seriesID)
   334  	delete(c.deleteChunks, seriesID)
   335  	return nil
   336  }
   337  
   338  func (c *compactedIndex) Cleanup() {}
   339  
   340  // ToIndexFile creates an indexFile from the chunksmetas stored in the builder.
   341  // Before building the index, it takes care of the lined up updates i.e deletes and adding of new chunks.
   342  func (c *compactedIndex) ToIndexFile() (index_shipper.Index, error) {
   343  	for seriesID, chks := range c.deleteChunks {
   344  		for _, chk := range chks {
   345  			chunkFound, err := c.builder.DropChunk(seriesID, chk)
   346  			if err != nil {
   347  				return nil, err
   348  			}
   349  			if !chunkFound {
   350  				return nil, fmt.Errorf("could not drop non-existent chunk %x from series %s", chk, seriesID)
   351  			}
   352  		}
   353  	}
   354  	c.deleteChunks = nil
   355  
   356  	for _, chk := range c.indexChunks {
   357  		err := c.builder.InsertChunk(chk.Metric.String(), index.ChunkMeta{
   358  			Checksum: chk.Checksum,
   359  			MinTime:  int64(chk.From),
   360  			MaxTime:  int64(chk.Through),
   361  			KB:       uint32(chk.Size()) / (1 << 10),
   362  			Entries:  uint32(chk.Data.Entries()),
   363  		})
   364  		if err != nil {
   365  			return nil, err
   366  		}
   367  	}
   368  	c.indexChunks = nil
   369  
   370  	id, err := c.builder.Build(c.ctx, c.workingDir, func(from, through model.Time, checksum uint32) Identifier {
   371  		id := SingleTenantTSDBIdentifier{
   372  			TS:       time.Now(),
   373  			From:     from,
   374  			Through:  through,
   375  			Checksum: checksum,
   376  		}
   377  		return newPrefixedIdentifier(id, c.workingDir, "")
   378  	})
   379  	if err != nil {
   380  		return nil, err
   381  	}
   382  
   383  	return NewShippableTSDBFile(id, false)
   384  }
   385  
   386  func getUnsafeBytes(s string) []byte {
   387  	return *((*[]byte)(unsafe.Pointer(&s)))
   388  }