github.com/thanos-io/thanos@v0.32.5/pkg/compactv2/compactor.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package compactv2
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"io"
    10  	"strings"
    11  
    12  	"github.com/go-kit/log"
    13  	"github.com/go-kit/log/level"
    14  	"github.com/pkg/errors"
    15  	"github.com/prometheus/prometheus/storage"
    16  	"github.com/prometheus/prometheus/tsdb"
    17  	"github.com/prometheus/prometheus/tsdb/chunkenc"
    18  	tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
    19  	"github.com/prometheus/prometheus/tsdb/index"
    20  
    21  	"github.com/thanos-io/thanos/pkg/block"
    22  )
    23  
    24  type ProgressLogger interface {
    25  	SeriesProcessed()
    26  }
    27  
    28  type progressLogger struct {
    29  	logger log.Logger
    30  
    31  	series    int
    32  	processed int
    33  }
    34  
    35  func NewProgressLogger(logger log.Logger, series int) *progressLogger {
    36  	return &progressLogger{logger: logger, series: series}
    37  }
    38  
    39  func (p *progressLogger) SeriesProcessed() {
    40  	p.processed++
    41  	if (p.series/10) == 0 || p.processed%(p.series/10) == 0 {
    42  		level.Info(p.logger).Log("msg", fmt.Sprintf("processed %0.2f%s of %v series", 100*(float64(p.processed)/float64(p.series)), "%", p.series))
    43  	}
    44  }
    45  
    46  type Compactor struct {
    47  	tmpDir string
    48  	logger log.Logger
    49  
    50  	chunkPool    chunkenc.Pool
    51  	changeLogger ChangeLogger
    52  
    53  	dryRun bool
    54  }
    55  
    56  type seriesReader struct {
    57  	ir tsdb.IndexReader
    58  	cr tsdb.ChunkReader
    59  }
    60  
    61  func New(tmpDir string, logger log.Logger, changeLogger ChangeLogger, pool chunkenc.Pool) *Compactor {
    62  	return &Compactor{
    63  		tmpDir:       tmpDir,
    64  		logger:       logger,
    65  		changeLogger: changeLogger,
    66  		chunkPool:    pool,
    67  	}
    68  }
    69  
    70  func NewDryRun(tmpDir string, logger log.Logger, changeLogger ChangeLogger, pool chunkenc.Pool) *Compactor {
    71  	s := New(tmpDir, logger, changeLogger, pool)
    72  	s.dryRun = true
    73  	return s
    74  }
    75  
    76  // TODO(bwplotka): Upstream this.
    77  func (w *Compactor) WriteSeries(ctx context.Context, readers []block.Reader, sWriter block.Writer, p ProgressLogger, modifiers ...Modifier) (err error) {
    78  	if len(readers) == 0 {
    79  		return errors.New("cannot write from no readers")
    80  	}
    81  
    82  	var (
    83  		sReaders []seriesReader
    84  		closers  []io.Closer
    85  	)
    86  	defer func() {
    87  		errs := tsdb_errors.NewMulti(err)
    88  		if cerr := tsdb_errors.CloseAll(closers); cerr != nil {
    89  			errs.Add(errors.Wrap(cerr, "close"))
    90  		}
    91  		err = errs.Err()
    92  	}()
    93  
    94  	for _, b := range readers {
    95  		indexr, err := b.Index()
    96  		if err != nil {
    97  			return errors.Wrapf(err, "open index reader for block %+v", b.Meta())
    98  		}
    99  		closers = append(closers, indexr)
   100  
   101  		chunkr, err := b.Chunks()
   102  		if err != nil {
   103  			return errors.Wrapf(err, "open chunk reader for block %+v", b.Meta())
   104  		}
   105  		closers = append(closers, chunkr)
   106  		sReaders = append(sReaders, seriesReader{ir: indexr, cr: chunkr})
   107  	}
   108  
   109  	symbols, set, err := compactSeries(ctx, sReaders...)
   110  	if err != nil {
   111  		return errors.Wrapf(err, "compact series from %v", func() string {
   112  			var metas []string
   113  			for _, m := range readers {
   114  				metas = append(metas, fmt.Sprintf("%v", m.Meta()))
   115  			}
   116  			return strings.Join(metas, ",")
   117  		}())
   118  	}
   119  
   120  	for _, m := range modifiers {
   121  		symbols, set = m.Modify(symbols, set, w.changeLogger, p)
   122  	}
   123  
   124  	if w.dryRun {
   125  		// Even for dry run, we need to exhaust iterators to see potential changes.
   126  		for set.Next() {
   127  			select {
   128  			case <-ctx.Done():
   129  				return ctx.Err()
   130  			default:
   131  			}
   132  
   133  			s := set.At()
   134  			iter := s.Iterator(nil)
   135  			for iter.Next() {
   136  			}
   137  			if err := iter.Err(); err != nil {
   138  				level.Error(w.logger).Log("msg", "error while iterating over chunks", "series", s.Labels(), "err", err)
   139  			}
   140  			p.SeriesProcessed()
   141  		}
   142  		if err := set.Err(); err != nil {
   143  			level.Error(w.logger).Log("msg", "error while iterating over set", "err", err)
   144  		}
   145  		return nil
   146  	}
   147  
   148  	if err := w.write(ctx, symbols, set, sWriter, p); err != nil {
   149  		return errors.Wrap(err, "write")
   150  	}
   151  	return nil
   152  }
   153  
   154  // compactSeries compacts blocks' series into symbols and one ChunkSeriesSet with lazy populating chunks.
   155  func compactSeries(ctx context.Context, sReaders ...seriesReader) (symbols index.StringIter, set storage.ChunkSeriesSet, _ error) {
   156  	if len(sReaders) == 0 {
   157  		return nil, nil, errors.New("cannot populate block from no readers")
   158  	}
   159  
   160  	var sets []storage.ChunkSeriesSet
   161  	for i, r := range sReaders {
   162  		select {
   163  		case <-ctx.Done():
   164  			return nil, nil, ctx.Err()
   165  		default:
   166  		}
   167  
   168  		k, v := index.AllPostingsKey()
   169  		all, err := r.ir.Postings(k, v)
   170  		if err != nil {
   171  			return nil, nil, err
   172  		}
   173  		all = r.ir.SortedPostings(all)
   174  		syms := r.ir.Symbols()
   175  		sets = append(sets, newLazyPopulateChunkSeriesSet(r, all))
   176  		if i == 0 {
   177  			symbols = syms
   178  			set = sets[0]
   179  			continue
   180  		}
   181  		symbols = tsdb.NewMergedStringIter(symbols, syms)
   182  	}
   183  
   184  	if len(sets) <= 1 {
   185  		return symbols, set, nil
   186  	}
   187  	// Merge series using compacting chunk series merger.
   188  	return symbols, storage.NewMergeChunkSeriesSet(sets, storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge)), nil
   189  }