github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/indexshipper/compactor/table.go (about)

     1  package compactor
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"sync"
     9  
    10  	"github.com/go-kit/log"
    11  	"github.com/go-kit/log/level"
    12  	"github.com/grafana/dskit/concurrency"
    13  	"github.com/prometheus/common/model"
    14  
    15  	chunk_util "github.com/grafana/loki/pkg/storage/chunk/client/util"
    16  	"github.com/grafana/loki/pkg/storage/config"
    17  	"github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/retention"
    18  	"github.com/grafana/loki/pkg/storage/stores/indexshipper/storage"
    19  	util_log "github.com/grafana/loki/pkg/util/log"
    20  )
    21  
    22  const (
    23  	uploadIndexSetsConcurrency = 10
    24  	gzipExtension              = ".gz"
    25  )
    26  
    27  var errRetentionFileCountNotOne = fmt.Errorf("can't apply retention when index file count is not one")
    28  
    29  type tableExpirationChecker interface {
    30  	IntervalMayHaveExpiredChunks(interval model.Interval, userID string) bool
    31  }
    32  
    33  type IndexCompactor interface {
    34  	// NewTableCompactor returns a new TableCompactor for compacting a table.
    35  	// commonIndexSet refers to common index files or in other words multi-tenant index.
    36  	// existingUserIndexSet refers to existing user specific index files in the storage.
    37  	// makeEmptyUserIndexSetFunc can be used for creating an empty indexSet for a user
    38  	// who does not have an index for it in existingUserIndexSet.
    39  	// periodConfig holds the PeriodConfig for the table.
    40  	NewTableCompactor(
    41  		ctx context.Context,
    42  		commonIndexSet IndexSet,
    43  		existingUserIndexSet map[string]IndexSet,
    44  		makeEmptyUserIndexSetFunc MakeEmptyUserIndexSetFunc,
    45  		periodConfig config.PeriodConfig,
    46  	) TableCompactor
    47  
    48  	// OpenCompactedIndexFile opens a compressed index file at given path.
    49  	OpenCompactedIndexFile(
    50  		ctx context.Context,
    51  		path,
    52  		tableName,
    53  		userID,
    54  		workingDir string,
    55  		periodConfig config.PeriodConfig,
    56  		logger log.Logger,
    57  	) (
    58  		CompactedIndex,
    59  		error,
    60  	)
    61  }
    62  
    63  type TableCompactor interface {
    64  	// CompactTable compacts the table.
    65  	// After compaction is done successfully, it should set the new/updated CompactedIndex for relevant IndexSets.
    66  	CompactTable() (err error)
    67  }
    68  
    69  type MakeEmptyUserIndexSetFunc func(userID string) (IndexSet, error)
    70  
    71  type table struct {
    72  	name               string
    73  	workingDirectory   string
    74  	indexStorageClient storage.Client
    75  	indexCompactor     IndexCompactor
    76  	tableMarker        retention.TableMarker
    77  	expirationChecker  tableExpirationChecker
    78  	periodConfig       config.PeriodConfig
    79  
    80  	baseUserIndexSet, baseCommonIndexSet storage.IndexSet
    81  
    82  	indexSets             map[string]*indexSet
    83  	usersWithPerUserIndex []string
    84  	logger                log.Logger
    85  
    86  	ctx context.Context
    87  }
    88  
    89  func newTable(ctx context.Context, workingDirectory string, indexStorageClient storage.Client,
    90  	indexCompactor IndexCompactor, periodConfig config.PeriodConfig,
    91  	tableMarker retention.TableMarker, expirationChecker tableExpirationChecker,
    92  ) (*table, error) {
    93  	err := chunk_util.EnsureDirectory(workingDirectory)
    94  	if err != nil {
    95  		return nil, err
    96  	}
    97  
    98  	table := table{
    99  		ctx:                ctx,
   100  		name:               filepath.Base(workingDirectory),
   101  		workingDirectory:   workingDirectory,
   102  		indexStorageClient: indexStorageClient,
   103  		indexCompactor:     indexCompactor,
   104  		tableMarker:        tableMarker,
   105  		expirationChecker:  expirationChecker,
   106  		periodConfig:       periodConfig,
   107  		indexSets:          map[string]*indexSet{},
   108  		baseUserIndexSet:   storage.NewIndexSet(indexStorageClient, true),
   109  		baseCommonIndexSet: storage.NewIndexSet(indexStorageClient, false),
   110  	}
   111  	table.logger = log.With(util_log.Logger, "table-name", table.name)
   112  
   113  	return &table, nil
   114  }
   115  
   116  func (t *table) compact(applyRetention bool) error {
   117  	indexFiles, usersWithPerUserIndex, err := t.indexStorageClient.ListFiles(t.ctx, t.name, false)
   118  	if err != nil {
   119  		return err
   120  	}
   121  
   122  	if len(indexFiles) == 0 && len(usersWithPerUserIndex) == 0 {
   123  		level.Info(t.logger).Log("msg", "no common index files and user index found")
   124  		return nil
   125  	}
   126  
   127  	t.usersWithPerUserIndex = usersWithPerUserIndex
   128  
   129  	level.Info(t.logger).Log("msg", "listed files", "count", len(indexFiles))
   130  
   131  	defer func() {
   132  		for _, is := range t.indexSets {
   133  			is.cleanup()
   134  		}
   135  
   136  		if err := os.RemoveAll(t.workingDirectory); err != nil {
   137  			level.Error(t.logger).Log("msg", fmt.Sprintf("failed to remove working directory %s", t.workingDirectory), "err", err)
   138  		}
   139  	}()
   140  
   141  	t.indexSets[""], err = newCommonIndexSet(t.ctx, t.name, t.baseCommonIndexSet, t.workingDirectory, t.logger)
   142  	if err != nil {
   143  		return err
   144  	}
   145  
   146  	// userIndexSets is just for passing it to NewTableCompactor since go considers map[string]*indexSet different type than map[string]IndexSet
   147  	userIndexSets := make(map[string]IndexSet, len(t.usersWithPerUserIndex))
   148  
   149  	for _, userID := range t.usersWithPerUserIndex {
   150  		var err error
   151  		t.indexSets[userID], err = newUserIndexSet(t.ctx, t.name, userID, t.baseUserIndexSet, filepath.Join(t.workingDirectory, userID), t.logger)
   152  		if err != nil {
   153  			return err
   154  		}
   155  		userIndexSets[userID] = t.indexSets[userID]
   156  	}
   157  
   158  	// protect indexSets with mutex so that we are concurrency safe if the TableCompactor calls MakeEmptyUserIndexSetFunc concurrently
   159  	indexSetsMtx := sync.Mutex{}
   160  	tableCompactor := t.indexCompactor.NewTableCompactor(t.ctx, t.indexSets[""], userIndexSets, func(userID string) (IndexSet, error) {
   161  		indexSetsMtx.Lock()
   162  		defer indexSetsMtx.Unlock()
   163  
   164  		var err error
   165  		t.indexSets[userID], err = newUserIndexSet(t.ctx, t.name, userID, t.baseUserIndexSet, filepath.Join(t.workingDirectory, userID), t.logger)
   166  		return t.indexSets[userID], err
   167  	}, t.periodConfig)
   168  
   169  	err = tableCompactor.CompactTable()
   170  	if err != nil {
   171  		return err
   172  	}
   173  
   174  	if applyRetention {
   175  		err := t.applyRetention()
   176  		if err != nil {
   177  			return err
   178  		}
   179  	}
   180  
   181  	return t.done()
   182  }
   183  
   184  func (t *table) done() error {
   185  	userIDs := make([]string, 0, len(t.indexSets))
   186  	for userID := range t.indexSets {
   187  		// indexSet.done() uploads the compacted db and cleans up the source index files.
   188  		// For user index sets, the files from common index sets are also a source of index.
   189  		// if we cleanup common index sets first, and we fail to upload newly compacted dbs in user index sets, then we will lose data.
   190  		// To avoid any data loss, we should call done() on common index sets at the end.
   191  		if userID == "" {
   192  			continue
   193  		}
   194  
   195  		userIDs = append(userIDs, userID)
   196  	}
   197  
   198  	err := concurrency.ForEachJob(t.ctx, len(userIDs), uploadIndexSetsConcurrency, func(ctx context.Context, idx int) error {
   199  		return t.indexSets[userIDs[idx]].done()
   200  	})
   201  	if err != nil {
   202  		return err
   203  	}
   204  
   205  	if commonIndexSet, ok := t.indexSets[""]; ok {
   206  		if err := commonIndexSet.done(); err != nil {
   207  			return err
   208  		}
   209  	}
   210  
   211  	return nil
   212  }
   213  
   214  // applyRetention applies retention on the index sets
   215  func (t *table) applyRetention() error {
   216  	tableInterval := retention.ExtractIntervalFromTableName(t.name)
   217  	// call runRetention on the index sets which may have expired chunks
   218  	for userID, is := range t.indexSets {
   219  		// make sure we do not apply retention on common index set which got compacted away to per-user index
   220  		if userID == "" && is.compactedIndex == nil && is.removeSourceObjects && !is.uploadCompactedDB {
   221  			continue
   222  		}
   223  
   224  		if !t.expirationChecker.IntervalMayHaveExpiredChunks(tableInterval, userID) {
   225  			continue
   226  		}
   227  
   228  		// compactedIndex is only set in indexSet when files have been compacted,
   229  		// so we need to open the compacted index file for applying retention if compactedIndex is nil
   230  		if is.compactedIndex == nil && len(is.ListSourceFiles()) == 1 {
   231  			if err := t.openCompactedIndexForRetention(is); err != nil {
   232  				return err
   233  			}
   234  		}
   235  
   236  		err := is.runRetention(t.tableMarker)
   237  		if err != nil {
   238  			return err
   239  		}
   240  	}
   241  
   242  	return nil
   243  }
   244  
   245  func (t *table) openCompactedIndexForRetention(idxSet *indexSet) error {
   246  	sourceFiles := idxSet.ListSourceFiles()
   247  	if len(sourceFiles) != 1 {
   248  		return errRetentionFileCountNotOne
   249  	}
   250  
   251  	downloadedAt, err := idxSet.GetSourceFile(sourceFiles[0])
   252  	if err != nil {
   253  		return err
   254  	}
   255  
   256  	compactedIndexFile, err := t.indexCompactor.OpenCompactedIndexFile(t.ctx, downloadedAt, t.name, idxSet.userID, filepath.Join(t.workingDirectory, idxSet.userID), t.periodConfig, idxSet.logger)
   257  	if err != nil {
   258  		return err
   259  	}
   260  
   261  	idxSet.setCompactedIndex(compactedIndexFile, false, false)
   262  
   263  	return nil
   264  }