github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/indexshipper/compactor/index_set.go (about)

     1  package compactor
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  
    11  	"github.com/go-kit/log"
    12  	"github.com/go-kit/log/level"
    13  	"github.com/pkg/errors"
    14  
    15  	"github.com/grafana/loki/pkg/chunkenc"
    16  	"github.com/grafana/loki/pkg/storage/chunk/client/util"
    17  	"github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/retention"
    18  	"github.com/grafana/loki/pkg/storage/stores/indexshipper/index"
    19  	"github.com/grafana/loki/pkg/storage/stores/indexshipper/storage"
    20  	util_log "github.com/grafana/loki/pkg/util/log"
    21  )
    22  
    23  type IndexSet interface {
    24  	GetTableName() string
    25  	ListSourceFiles() []storage.IndexFile
    26  	GetSourceFile(indexFile storage.IndexFile) (string, error)
    27  	GetLogger() log.Logger
    28  	GetWorkingDir() string
    29  	// SetCompactedIndex sets the CompactedIndex for upload/applying retention and making the compactor remove the source files.
    30  	// CompactedIndex can be nil only in case of all the source files in common index set being compacted away to per tenant index.
    31  	// It would return an error if the CompactedIndex is nil and removeSourceFiles is true in case of user index set since
    32  	// compaction should either create new files or can be a noop if there is nothing to compact.
    33  	// There is no need to call SetCompactedIndex if no changes were made to the index for this IndexSet.
    34  	SetCompactedIndex(compactedIndex CompactedIndex, removeSourceFiles bool) error
    35  }
    36  
    37  // CompactedIndex is built by TableCompactor for IndexSet after compaction.
    38  // It would be used for:
    39  // 1. applying custom retention, processing delete requests using IndexProcessor
    40  // 2. uploading the compacted index to storage by converting it to index.Index using ToIndexFile
    41  // After all the operations are successfully done or in case of failure, Cleanup would be called to cleanup the state.
    42  type CompactedIndex interface {
    43  	// IndexProcessor is used for applying custom retention and processing delete requests.
    44  	retention.IndexProcessor
    45  	// Cleanup should clean up all the state built during compaction.
    46  	// It is typically called at the end or in case of an error.
    47  	Cleanup()
    48  	// ToIndexFile is used to convert the CompactedIndex to an IndexFile for uploading to the object store.
    49  	// Once the IndexFile is uploaded using Index.Reader, the file is closed using Index.Close and removed from disk using Index.Path.
    50  	ToIndexFile() (index.Index, error)
    51  }
    52  
    53  // indexSet helps with doing operations on a set of index files belonging to a single user or common index files shared by users.
    54  type indexSet struct {
    55  	ctx               context.Context
    56  	tableName, userID string
    57  	workingDir        string
    58  	baseIndexSet      storage.IndexSet
    59  
    60  	uploadCompactedDB   bool
    61  	removeSourceObjects bool
    62  
    63  	compactedIndex CompactedIndex
    64  	sourceObjects  []storage.IndexFile
    65  	logger         log.Logger
    66  }
    67  
    68  // newUserIndexSet intializes a new index set for user index.
    69  func newUserIndexSet(ctx context.Context, tableName, userID string, baseUserIndexSet storage.IndexSet, workingDir string, logger log.Logger) (*indexSet, error) {
    70  	if !baseUserIndexSet.IsUserBasedIndexSet() {
    71  		return nil, fmt.Errorf("base index set is not for user index")
    72  	}
    73  
    74  	return newIndexSet(ctx, tableName, userID, baseUserIndexSet, workingDir, log.With(logger, "user-id", userID))
    75  }
    76  
    77  // newCommonIndexSet intializes a new index set for common index.
    78  func newCommonIndexSet(ctx context.Context, tableName string, baseUserIndexSet storage.IndexSet, workingDir string, logger log.Logger) (*indexSet, error) {
    79  	if baseUserIndexSet.IsUserBasedIndexSet() {
    80  		return nil, fmt.Errorf("base index set is not for common index")
    81  	}
    82  
    83  	return newIndexSet(ctx, tableName, "", baseUserIndexSet, workingDir, logger)
    84  }
    85  
    86  func newIndexSet(ctx context.Context, tableName, userID string, baseIndexSet storage.IndexSet, workingDir string, logger log.Logger) (*indexSet, error) {
    87  	if err := util.EnsureDirectory(workingDir); err != nil {
    88  		return nil, err
    89  	}
    90  
    91  	ui := &indexSet{
    92  		ctx:          ctx,
    93  		tableName:    tableName,
    94  		userID:       userID,
    95  		workingDir:   workingDir,
    96  		baseIndexSet: baseIndexSet,
    97  		logger:       logger,
    98  	}
    99  
   100  	if userID != "" {
   101  		ui.logger = log.With(logger, "user-id", userID)
   102  	}
   103  
   104  	var err error
   105  	ui.sourceObjects, err = ui.baseIndexSet.ListFiles(ui.ctx, ui.tableName, ui.userID, false)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	return ui, nil
   111  }
   112  
   113  func (is *indexSet) GetTableName() string {
   114  	return is.tableName
   115  }
   116  
   117  func (is *indexSet) GetWorkingDir() string {
   118  	return is.workingDir
   119  }
   120  
   121  func (is *indexSet) ListSourceFiles() []storage.IndexFile {
   122  	return is.sourceObjects
   123  }
   124  
   125  func (is *indexSet) GetSourceFile(indexFile storage.IndexFile) (string, error) {
   126  	decompress := storage.IsCompressedFile(indexFile.Name)
   127  	dst := filepath.Join(is.workingDir, indexFile.Name)
   128  	if decompress {
   129  		dst = strings.Trim(dst, gzipExtension)
   130  	}
   131  
   132  	err := storage.DownloadFileFromStorage(dst, storage.IsCompressedFile(indexFile.Name),
   133  		false, storage.LoggerWithFilename(is.logger, indexFile.Name),
   134  		func() (io.ReadCloser, error) {
   135  			return is.baseIndexSet.GetFile(is.ctx, is.tableName, is.userID, indexFile.Name)
   136  		})
   137  	if err != nil {
   138  		return "", err
   139  	}
   140  
   141  	return dst, nil
   142  }
   143  
   144  func (is *indexSet) GetLogger() log.Logger {
   145  	return is.logger
   146  }
   147  
   148  func (is *indexSet) SetCompactedIndex(compactedIndex CompactedIndex, removeSourceFiles bool) error {
   149  	if compactedIndex == nil && removeSourceFiles && is.userID != "" {
   150  		return errors.New("compacted index can't be nil when remove source files is true for user index set")
   151  	}
   152  
   153  	is.setCompactedIndex(compactedIndex, compactedIndex != nil, removeSourceFiles)
   154  	return nil
   155  }
   156  
   157  func (is *indexSet) setCompactedIndex(compactedIndex CompactedIndex, uploadCompactedDB, removeSourceObjects bool) {
   158  	is.compactedIndex = compactedIndex
   159  	is.uploadCompactedDB = uploadCompactedDB
   160  	is.removeSourceObjects = removeSourceObjects
   161  }
   162  
   163  // runRetention runs the retention on index set
   164  func (is *indexSet) runRetention(tableMarker retention.TableMarker) error {
   165  	if is.compactedIndex == nil {
   166  		return nil
   167  	}
   168  
   169  	empty, modified, err := tableMarker.MarkForDelete(is.ctx, is.tableName, is.userID, is.compactedIndex, is.logger)
   170  	if err != nil {
   171  		return err
   172  	}
   173  
   174  	if empty {
   175  		is.uploadCompactedDB = false
   176  		is.removeSourceObjects = true
   177  	} else if modified {
   178  		is.uploadCompactedDB = true
   179  		is.removeSourceObjects = true
   180  	}
   181  
   182  	return nil
   183  }
   184  
   185  // upload uploads the compacted index in compressed format.
   186  func (is *indexSet) upload() error {
   187  	if is.compactedIndex == nil {
   188  		return errors.New("can't upload nil or empty compacted index")
   189  	}
   190  
   191  	// ToDo(Sandeep): move index uploading to a common function and share it with generic index-shipper
   192  	idx, err := is.compactedIndex.ToIndexFile()
   193  	if err != nil {
   194  		return err
   195  	}
   196  
   197  	defer func() {
   198  		filePath := idx.Path()
   199  
   200  		if err := idx.Close(); err != nil {
   201  			level.Error(is.logger).Log("msg", "failed to close indexFile", "err", err)
   202  			return
   203  		}
   204  
   205  		if err := os.Remove(filePath); err != nil {
   206  			level.Error(is.logger).Log("msg", "failed to remove indexFile", "err", err)
   207  			return
   208  		}
   209  	}()
   210  
   211  	fileName := idx.Name()
   212  	level.Debug(is.logger).Log("msg", fmt.Sprintf("uploading index %s", fileName))
   213  
   214  	idxPath := idx.Path()
   215  
   216  	filePath := fmt.Sprintf("%s%s", idxPath, ".temp")
   217  	f, err := os.Create(filePath)
   218  	if err != nil {
   219  		return err
   220  	}
   221  
   222  	defer func() {
   223  		if err := f.Close(); err != nil {
   224  			level.Error(util_log.Logger).Log("msg", "failed to close temp file", "path", filePath, "err", err)
   225  		}
   226  
   227  		if err := os.Remove(filePath); err != nil {
   228  			level.Error(util_log.Logger).Log("msg", "failed to remove temp file", "path", filePath, "err", err)
   229  		}
   230  	}()
   231  
   232  	compressedWriter := chunkenc.Gzip.GetWriter(f)
   233  	defer chunkenc.Gzip.PutWriter(compressedWriter)
   234  
   235  	idxReader, err := idx.Reader()
   236  	if err != nil {
   237  		return err
   238  	}
   239  
   240  	_, err = idxReader.Seek(0, 0)
   241  	if err != nil {
   242  		return err
   243  	}
   244  
   245  	_, err = io.Copy(compressedWriter, idxReader)
   246  	if err != nil {
   247  		return err
   248  	}
   249  
   250  	err = compressedWriter.Close()
   251  	if err != nil {
   252  		return err
   253  	}
   254  
   255  	// flush the file to disk and seek the file to the beginning.
   256  	if err := f.Sync(); err != nil {
   257  		return err
   258  	}
   259  
   260  	if _, err := f.Seek(0, 0); err != nil {
   261  		return err
   262  	}
   263  
   264  	return is.baseIndexSet.PutFile(is.ctx, is.tableName, is.userID, fmt.Sprintf("%s.gz", fileName), f)
   265  }
   266  
   267  // removeFilesFromStorage deletes source objects from storage.
   268  func (is *indexSet) removeFilesFromStorage() error {
   269  	level.Info(is.logger).Log("msg", "removing source db files from storage", "count", len(is.sourceObjects))
   270  
   271  	for _, object := range is.sourceObjects {
   272  		err := is.baseIndexSet.DeleteFile(is.ctx, is.tableName, is.userID, object.Name)
   273  		if err != nil {
   274  			return err
   275  		}
   276  	}
   277  
   278  	return nil
   279  }
   280  
   281  // done takes care of file operations which includes:
   282  // - recreate the compacted db if required.
   283  // - upload the compacted db if required.
   284  // - remove the source objects from storage if required.
   285  func (is *indexSet) done() error {
   286  	if is.uploadCompactedDB {
   287  		if err := is.upload(); err != nil {
   288  			return err
   289  		}
   290  	}
   291  
   292  	if is.removeSourceObjects {
   293  		return is.removeFilesFromStorage()
   294  	}
   295  
   296  	return nil
   297  }
   298  
   299  func (is *indexSet) cleanup() {
   300  	if is.compactedIndex == nil {
   301  		return
   302  	}
   303  	is.compactedIndex.Cleanup()
   304  }