github.com/thanos-io/thanos@v0.32.5/pkg/compactv2/compactor.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package compactv2 5 6 import ( 7 "context" 8 "fmt" 9 "io" 10 "strings" 11 12 "github.com/go-kit/log" 13 "github.com/go-kit/log/level" 14 "github.com/pkg/errors" 15 "github.com/prometheus/prometheus/storage" 16 "github.com/prometheus/prometheus/tsdb" 17 "github.com/prometheus/prometheus/tsdb/chunkenc" 18 tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" 19 "github.com/prometheus/prometheus/tsdb/index" 20 21 "github.com/thanos-io/thanos/pkg/block" 22 ) 23 24 type ProgressLogger interface { 25 SeriesProcessed() 26 } 27 28 type progressLogger struct { 29 logger log.Logger 30 31 series int 32 processed int 33 } 34 35 func NewProgressLogger(logger log.Logger, series int) *progressLogger { 36 return &progressLogger{logger: logger, series: series} 37 } 38 39 func (p *progressLogger) SeriesProcessed() { 40 p.processed++ 41 if (p.series/10) == 0 || p.processed%(p.series/10) == 0 { 42 level.Info(p.logger).Log("msg", fmt.Sprintf("processed %0.2f%s of %v series", 100*(float64(p.processed)/float64(p.series)), "%", p.series)) 43 } 44 } 45 46 type Compactor struct { 47 tmpDir string 48 logger log.Logger 49 50 chunkPool chunkenc.Pool 51 changeLogger ChangeLogger 52 53 dryRun bool 54 } 55 56 type seriesReader struct { 57 ir tsdb.IndexReader 58 cr tsdb.ChunkReader 59 } 60 61 func New(tmpDir string, logger log.Logger, changeLogger ChangeLogger, pool chunkenc.Pool) *Compactor { 62 return &Compactor{ 63 tmpDir: tmpDir, 64 logger: logger, 65 changeLogger: changeLogger, 66 chunkPool: pool, 67 } 68 } 69 70 func NewDryRun(tmpDir string, logger log.Logger, changeLogger ChangeLogger, pool chunkenc.Pool) *Compactor { 71 s := New(tmpDir, logger, changeLogger, pool) 72 s.dryRun = true 73 return s 74 } 75 76 // TODO(bwplotka): Upstream this. 77 func (w *Compactor) WriteSeries(ctx context.Context, readers []block.Reader, sWriter block.Writer, p ProgressLogger, modifiers ...Modifier) (err error) { 78 if len(readers) == 0 { 79 return errors.New("cannot write from no readers") 80 } 81 82 var ( 83 sReaders []seriesReader 84 closers []io.Closer 85 ) 86 defer func() { 87 errs := tsdb_errors.NewMulti(err) 88 if cerr := tsdb_errors.CloseAll(closers); cerr != nil { 89 errs.Add(errors.Wrap(cerr, "close")) 90 } 91 err = errs.Err() 92 }() 93 94 for _, b := range readers { 95 indexr, err := b.Index() 96 if err != nil { 97 return errors.Wrapf(err, "open index reader for block %+v", b.Meta()) 98 } 99 closers = append(closers, indexr) 100 101 chunkr, err := b.Chunks() 102 if err != nil { 103 return errors.Wrapf(err, "open chunk reader for block %+v", b.Meta()) 104 } 105 closers = append(closers, chunkr) 106 sReaders = append(sReaders, seriesReader{ir: indexr, cr: chunkr}) 107 } 108 109 symbols, set, err := compactSeries(ctx, sReaders...) 110 if err != nil { 111 return errors.Wrapf(err, "compact series from %v", func() string { 112 var metas []string 113 for _, m := range readers { 114 metas = append(metas, fmt.Sprintf("%v", m.Meta())) 115 } 116 return strings.Join(metas, ",") 117 }()) 118 } 119 120 for _, m := range modifiers { 121 symbols, set = m.Modify(symbols, set, w.changeLogger, p) 122 } 123 124 if w.dryRun { 125 // Even for dry run, we need to exhaust iterators to see potential changes. 126 for set.Next() { 127 select { 128 case <-ctx.Done(): 129 return ctx.Err() 130 default: 131 } 132 133 s := set.At() 134 iter := s.Iterator(nil) 135 for iter.Next() { 136 } 137 if err := iter.Err(); err != nil { 138 level.Error(w.logger).Log("msg", "error while iterating over chunks", "series", s.Labels(), "err", err) 139 } 140 p.SeriesProcessed() 141 } 142 if err := set.Err(); err != nil { 143 level.Error(w.logger).Log("msg", "error while iterating over set", "err", err) 144 } 145 return nil 146 } 147 148 if err := w.write(ctx, symbols, set, sWriter, p); err != nil { 149 return errors.Wrap(err, "write") 150 } 151 return nil 152 } 153 154 // compactSeries compacts blocks' series into symbols and one ChunkSeriesSet with lazy populating chunks. 155 func compactSeries(ctx context.Context, sReaders ...seriesReader) (symbols index.StringIter, set storage.ChunkSeriesSet, _ error) { 156 if len(sReaders) == 0 { 157 return nil, nil, errors.New("cannot populate block from no readers") 158 } 159 160 var sets []storage.ChunkSeriesSet 161 for i, r := range sReaders { 162 select { 163 case <-ctx.Done(): 164 return nil, nil, ctx.Err() 165 default: 166 } 167 168 k, v := index.AllPostingsKey() 169 all, err := r.ir.Postings(k, v) 170 if err != nil { 171 return nil, nil, err 172 } 173 all = r.ir.SortedPostings(all) 174 syms := r.ir.Symbols() 175 sets = append(sets, newLazyPopulateChunkSeriesSet(r, all)) 176 if i == 0 { 177 symbols = syms 178 set = sets[0] 179 continue 180 } 181 symbols = tsdb.NewMergedStringIter(symbols, syms) 182 } 183 184 if len(sets) <= 1 { 185 return symbols, set, nil 186 } 187 // Merge series using compacting chunk series merger. 188 return symbols, storage.NewMergeChunkSeriesSet(sets, storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge)), nil 189 }