github.com/thanos-io/thanos@v0.32.5/pkg/compactv2/modifiers.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package compactv2
     5  
     6  import (
     7  	"math"
     8  	"sort"
     9  
    10  	"github.com/pkg/errors"
    11  	"github.com/prometheus/prometheus/model/histogram"
    12  	"github.com/prometheus/prometheus/model/labels"
    13  	"github.com/prometheus/prometheus/model/relabel"
    14  	"github.com/prometheus/prometheus/storage"
    15  	"github.com/prometheus/prometheus/tsdb"
    16  	"github.com/prometheus/prometheus/tsdb/chunkenc"
    17  	"github.com/prometheus/prometheus/tsdb/chunks"
    18  	"github.com/prometheus/prometheus/tsdb/index"
    19  	"github.com/prometheus/prometheus/tsdb/tombstones"
    20  
    21  	"github.com/thanos-io/thanos/pkg/block/metadata"
    22  )
    23  
    24  type Modifier interface {
    25  	Modify(sym index.StringIter, set storage.ChunkSeriesSet, log ChangeLogger, p ProgressLogger) (index.StringIter, storage.ChunkSeriesSet)
    26  }
    27  
    28  type DeletionModifier struct {
    29  	deletions []metadata.DeletionRequest
    30  }
    31  
    32  func WithDeletionModifier(deletions ...metadata.DeletionRequest) *DeletionModifier {
    33  	return &DeletionModifier{deletions: deletions}
    34  }
    35  
    36  func (d *DeletionModifier) Modify(sym index.StringIter, set storage.ChunkSeriesSet, log ChangeLogger, p ProgressLogger) (index.StringIter, storage.ChunkSeriesSet) {
    37  	// TODO(bwplotka): Modify symbols as well. Otherwise large string will be kept forever.
    38  	// This is however what Prometheus already does. It does not increase index size too much though.
    39  	// This needs a bit of work due to sorting and tracking required to rebuild them.pp
    40  
    41  	return sym, &delModifierSeriesSet{
    42  		d: d,
    43  
    44  		ChunkSeriesSet: set,
    45  		log:            log,
    46  		p:              p,
    47  	}
    48  }
    49  
    50  type delModifierSeriesSet struct {
    51  	storage.ChunkSeriesSet
    52  
    53  	d   *DeletionModifier
    54  	log ChangeLogger
    55  	p   ProgressLogger
    56  
    57  	curr *storage.ChunkSeriesEntry
    58  	err  error
    59  }
    60  
    61  func (d *delModifierSeriesSet) Next() bool {
    62  SeriesLoop:
    63  	for d.ChunkSeriesSet.Next() {
    64  		s := d.ChunkSeriesSet.At()
    65  		lbls := s.Labels()
    66  
    67  		var intervals tombstones.Intervals
    68  	DeletionsLoop:
    69  		for _, deletions := range d.d.deletions {
    70  			for _, m := range deletions.Matchers {
    71  				v := lbls.Get(m.Name)
    72  
    73  				// Only if all matchers in the deletion request are matched can we proceed to deletion.
    74  				if v == "" || !m.Matches(v) {
    75  					continue DeletionsLoop
    76  				}
    77  			}
    78  			if len(deletions.Intervals) > 0 {
    79  				for _, in := range deletions.Intervals {
    80  					intervals = intervals.Add(in)
    81  				}
    82  				continue
    83  			}
    84  
    85  			// Special case: Delete whole series.
    86  			chksIter := s.Iterator(nil)
    87  			var chks []chunks.Meta
    88  			for chksIter.Next() {
    89  				chks = append(chks, chksIter.At())
    90  			}
    91  			if d.err = chksIter.Err(); d.err != nil {
    92  				return false
    93  			}
    94  
    95  			var deleted tombstones.Intervals
    96  			if len(chks) > 0 {
    97  				deleted = deleted.Add(tombstones.Interval{Mint: chks[0].MinTime, Maxt: chks[len(chks)-1].MaxTime})
    98  			}
    99  			d.log.DeleteSeries(lbls, deleted)
   100  			d.p.SeriesProcessed()
   101  			continue SeriesLoop
   102  		}
   103  
   104  		d.curr = &storage.ChunkSeriesEntry{
   105  			Lset: lbls,
   106  			ChunkIteratorFn: func(it chunks.Iterator) chunks.Iterator {
   107  				return NewDelGenericSeriesIterator(s.Iterator(it), intervals, func(intervals tombstones.Intervals) {
   108  					d.log.DeleteSeries(lbls, intervals)
   109  				}).ToChunkSeriesIterator()
   110  			},
   111  		}
   112  		return true
   113  	}
   114  	return false
   115  }
   116  
   117  // intersection returns intersection between interval and range of intervals.
   118  func intersection(i tombstones.Interval, dranges tombstones.Intervals) tombstones.Intervals {
   119  	var ret tombstones.Intervals
   120  	for _, r := range dranges {
   121  		isLeftIn := r.Mint <= i.Maxt
   122  		isRightIn := i.Mint <= r.Maxt
   123  		if !isLeftIn || !isRightIn {
   124  			continue
   125  		}
   126  		intersection := tombstones.Interval{Mint: r.Mint, Maxt: r.Maxt}
   127  		if intersection.Mint < i.Mint {
   128  			intersection.Mint = i.Mint
   129  		}
   130  		if intersection.Maxt > i.Maxt {
   131  			intersection.Maxt = i.Maxt
   132  		}
   133  		ret = ret.Add(intersection)
   134  	}
   135  	return ret
   136  }
   137  
   138  func (d *delModifierSeriesSet) At() storage.ChunkSeries {
   139  	return d.curr
   140  }
   141  
   142  func (d *delModifierSeriesSet) Err() error {
   143  	if d.err != nil {
   144  		return d.err
   145  	}
   146  	return d.ChunkSeriesSet.Err()
   147  }
   148  
   149  func (d *delModifierSeriesSet) Warnings() storage.Warnings {
   150  	return d.ChunkSeriesSet.Warnings()
   151  }
   152  
   153  type delGenericSeriesIterator struct {
   154  	chks chunks.Iterator
   155  
   156  	err       error
   157  	bufIter   *tsdb.DeletedIterator
   158  	intervals tombstones.Intervals
   159  
   160  	currDelIter chunkenc.Iterator
   161  	currChkMeta chunks.Meta
   162  	logDelete   func(intervals tombstones.Intervals)
   163  	deleted     tombstones.Intervals
   164  }
   165  
   166  func NewDelGenericSeriesIterator(
   167  	chks chunks.Iterator,
   168  	intervals tombstones.Intervals,
   169  	logDelete func(intervals tombstones.Intervals),
   170  ) *delGenericSeriesIterator {
   171  	return &delGenericSeriesIterator{
   172  		chks:      chks,
   173  		bufIter:   &tsdb.DeletedIterator{},
   174  		intervals: intervals,
   175  		logDelete: logDelete,
   176  	}
   177  }
   178  
   179  func (d *delGenericSeriesIterator) next() (ok bool) {
   180  	if d.err != nil {
   181  		return false
   182  	}
   183  
   184  	for d.chks.Next() {
   185  		d.currChkMeta = d.chks.At()
   186  
   187  		if chk := (tombstones.Interval{Mint: d.currChkMeta.MinTime, Maxt: d.currChkMeta.MaxTime}); chk.IsSubrange(d.intervals) {
   188  			d.deleted = d.deleted.Add(chk)
   189  			continue
   190  		}
   191  		d.bufIter.Intervals = d.bufIter.Intervals[:0]
   192  		for _, interval := range d.intervals {
   193  			if d.currChkMeta.OverlapsClosedInterval(interval.Mint, interval.Maxt) {
   194  				d.bufIter.Intervals = d.bufIter.Intervals.Add(interval)
   195  			}
   196  		}
   197  		if len(d.bufIter.Intervals) == 0 {
   198  			d.currDelIter = nil
   199  			return true
   200  		}
   201  
   202  		for _, del := range intersection(tombstones.Interval{Mint: d.currChkMeta.MinTime, Maxt: d.currChkMeta.MaxTime}, d.bufIter.Intervals) {
   203  			d.deleted = d.deleted.Add(del)
   204  		}
   205  
   206  		// We don't want full chunk, take just part of it.
   207  		d.bufIter.Iter = d.currChkMeta.Chunk.Iterator(nil)
   208  		d.currDelIter = d.bufIter
   209  		return true
   210  	}
   211  	if len(d.deleted) > 0 {
   212  		d.logDelete(d.deleted)
   213  	}
   214  	return false
   215  }
   216  
   217  func (d *delGenericSeriesIterator) Err() error {
   218  	if d.err != nil {
   219  		return d.err
   220  	}
   221  	return d.chks.Err()
   222  }
   223  
   224  func (d *delGenericSeriesIterator) ToSeriesIterator() chunkenc.Iterator {
   225  	return &delSeriesIterator{delGenericSeriesIterator: d}
   226  }
   227  func (d *delGenericSeriesIterator) ToChunkSeriesIterator() chunks.Iterator {
   228  	return &delChunkSeriesIterator{delGenericSeriesIterator: d}
   229  }
   230  
   231  // delSeriesIterator allows to iterate over samples for the single series.
   232  type delSeriesIterator struct {
   233  	*delGenericSeriesIterator
   234  
   235  	curr chunkenc.Iterator
   236  }
   237  
   238  func (p *delSeriesIterator) Next() chunkenc.ValueType {
   239  	if p.curr == nil {
   240  		return chunkenc.ValNone
   241  	}
   242  
   243  	if valueType := p.curr.Next(); valueType != chunkenc.ValNone {
   244  		return valueType
   245  	}
   246  
   247  	for p.next() {
   248  		if p.currDelIter != nil {
   249  			p.curr = p.currDelIter
   250  		} else {
   251  			p.curr = p.currChkMeta.Chunk.Iterator(nil)
   252  		}
   253  		if valueType := p.curr.Next(); valueType != chunkenc.ValNone {
   254  			return valueType
   255  		}
   256  	}
   257  	return chunkenc.ValNone
   258  }
   259  
   260  func (p *delSeriesIterator) Seek(t int64) chunkenc.ValueType {
   261  	if p.curr == nil {
   262  		return chunkenc.ValNone
   263  	}
   264  
   265  	if valueType := p.curr.Seek(t); valueType != chunkenc.ValNone {
   266  		return valueType
   267  	}
   268  	for p.Next() != chunkenc.ValNone {
   269  		if valueType := p.curr.Seek(t); valueType != chunkenc.ValNone {
   270  			return valueType
   271  		}
   272  	}
   273  	return chunkenc.ValNone
   274  }
   275  
   276  func (p *delSeriesIterator) At() (int64, float64) { return p.curr.At() }
   277  
   278  // TODO(rabenhorst): Needs to be implemented for native histogram support.
   279  func (p *delSeriesIterator) AtHistogram() (int64, *histogram.Histogram) {
   280  	panic("not implemented")
   281  }
   282  
   283  func (p *delSeriesIterator) AtFloatHistogram() (int64, *histogram.FloatHistogram) {
   284  	panic("not implemented")
   285  }
   286  
   287  func (p *delSeriesIterator) AtT() int64 {
   288  	t, _ := p.curr.At()
   289  	return t
   290  }
   291  
   292  func (p *delSeriesIterator) Err() error {
   293  	if err := p.delGenericSeriesIterator.Err(); err != nil {
   294  		return err
   295  	}
   296  	if p.curr != nil {
   297  		return p.curr.Err()
   298  	}
   299  	return nil
   300  }
   301  
   302  type delChunkSeriesIterator struct {
   303  	*delGenericSeriesIterator
   304  
   305  	curr chunks.Meta
   306  }
   307  
   308  func (p *delChunkSeriesIterator) Next() bool {
   309  	if !p.next() {
   310  		return false
   311  	}
   312  
   313  	p.curr = p.currChkMeta
   314  	if p.currDelIter == nil {
   315  		return true
   316  	}
   317  
   318  	// Re-encode the chunk if iterator is provider. This means that it has some samples to be deleted or chunk is opened.
   319  	newChunk := chunkenc.NewXORChunk()
   320  	app, err := newChunk.Appender()
   321  	if err != nil {
   322  		p.err = err
   323  		return false
   324  	}
   325  
   326  	if p.currDelIter.Next() == chunkenc.ValNone {
   327  		if err := p.currDelIter.Err(); err != nil {
   328  			p.err = errors.Wrap(err, "iterate chunk while re-encoding")
   329  			return false
   330  		}
   331  
   332  		// Empty chunk, this should not happen, as we assume full deletions being filtered before this iterator.
   333  		p.err = errors.Wrap(err, "populateWithDelChunkSeriesIterator: unexpected empty chunk found while rewriting chunk")
   334  		return false
   335  	}
   336  
   337  	t, v := p.currDelIter.At()
   338  	p.curr.MinTime = t
   339  	app.Append(t, v)
   340  
   341  	for p.currDelIter.Next() != chunkenc.ValNone {
   342  		t, v = p.currDelIter.At()
   343  		app.Append(t, v)
   344  	}
   345  	if err := p.currDelIter.Err(); err != nil {
   346  		p.err = errors.Wrap(err, "iterate chunk while re-encoding")
   347  		return false
   348  	}
   349  
   350  	p.curr.Chunk = newChunk
   351  	p.curr.MaxTime = t
   352  	return true
   353  }
   354  
   355  func (p *delChunkSeriesIterator) At() chunks.Meta { return p.curr }
   356  
   357  type RelabelModifier struct {
   358  	relabels []*relabel.Config
   359  }
   360  
   361  func WithRelabelModifier(relabels ...*relabel.Config) *RelabelModifier {
   362  	return &RelabelModifier{relabels: relabels}
   363  }
   364  
   365  func (d *RelabelModifier) Modify(_ index.StringIter, set storage.ChunkSeriesSet, log ChangeLogger, p ProgressLogger) (index.StringIter, storage.ChunkSeriesSet) {
   366  	// Gather symbols.
   367  	symbols := make(map[string]struct{})
   368  	chunkSeriesMap := make(map[string]*mergeChunkSeries)
   369  
   370  	for set.Next() {
   371  		s := set.At()
   372  		lbls := s.Labels()
   373  		chksIter := s.Iterator(nil)
   374  
   375  		// The labels have to be copied because `relabel.Process` is now overwriting the original
   376  		// labels to same memory. This happens since Prometheus v2.39.0.
   377  		if processedLabels, _ := relabel.Process(lbls.Copy(), d.relabels...); len(processedLabels) == 0 {
   378  			// Special case: Delete whole series if no labels are present.
   379  			var (
   380  				minT int64 = math.MaxInt64
   381  				maxT int64 = math.MinInt64
   382  			)
   383  			for chksIter.Next() {
   384  				c := chksIter.At()
   385  				if c.MinTime < minT {
   386  					minT = c.MinTime
   387  				}
   388  				if c.MaxTime > maxT {
   389  					maxT = c.MaxTime
   390  				}
   391  			}
   392  
   393  			if err := chksIter.Err(); err != nil {
   394  				return errorOnlyStringIter{err: err}, nil
   395  			}
   396  
   397  			var deleted tombstones.Intervals
   398  			// If minTime is set then there is at least one chunk.
   399  			if minT != math.MaxInt64 {
   400  				deleted = deleted.Add(tombstones.Interval{Mint: minT, Maxt: maxT})
   401  			}
   402  			log.DeleteSeries(lbls, deleted)
   403  			p.SeriesProcessed()
   404  		} else {
   405  			for _, lb := range processedLabels {
   406  				symbols[lb.Name] = struct{}{}
   407  				symbols[lb.Value] = struct{}{}
   408  			}
   409  
   410  			lbStr := processedLabels.String()
   411  			if _, ok := chunkSeriesMap[lbStr]; !ok {
   412  				chunkSeriesMap[lbStr] = newChunkSeriesBuilder(processedLabels)
   413  			}
   414  			cs := chunkSeriesMap[lbStr]
   415  
   416  			// We have to iterate over the chunks and populate them here as
   417  			// lazyPopulateChunkSeriesSet reuses chunks and previous chunks
   418  			// will be overwritten at set.Next() call.
   419  			for chksIter.Next() {
   420  				c := chksIter.At()
   421  				cs.addIter(c.Chunk.Iterator(nil))
   422  			}
   423  			if err := chksIter.Err(); err != nil {
   424  				return errorOnlyStringIter{err}, nil
   425  			}
   426  
   427  			if !labels.Equal(lbls, processedLabels) {
   428  				log.ModifySeries(lbls, processedLabels)
   429  			}
   430  		}
   431  	}
   432  
   433  	symbolsSlice := make([]string, 0, len(symbols))
   434  	for s := range symbols {
   435  		symbolsSlice = append(symbolsSlice, s)
   436  	}
   437  	sort.Strings(symbolsSlice)
   438  
   439  	chunkSeriesSet := make([]storage.ChunkSeries, 0, len(chunkSeriesMap))
   440  	for _, chunkSeries := range chunkSeriesMap {
   441  		chunkSeriesSet = append(chunkSeriesSet, chunkSeries)
   442  	}
   443  	sort.Slice(chunkSeriesSet, func(i, j int) bool {
   444  		return labels.Compare(chunkSeriesSet[i].Labels(), chunkSeriesSet[j].Labels()) < 0
   445  	})
   446  	return index.NewStringListIter(symbolsSlice), newListChunkSeriesSet(chunkSeriesSet...)
   447  }
   448  
   449  // mergeChunkSeries build storage.ChunkSeries from several chunkenc.Iterator.
   450  type mergeChunkSeries struct {
   451  	lset labels.Labels
   452  	ss   []storage.Series
   453  }
   454  
   455  func newChunkSeriesBuilder(lset labels.Labels) *mergeChunkSeries {
   456  	return &mergeChunkSeries{
   457  		lset: lset,
   458  		ss:   make([]storage.Series, 0),
   459  	}
   460  }
   461  
   462  func (s *mergeChunkSeries) addIter(iter chunkenc.Iterator) {
   463  	s.ss = append(s.ss, &storage.SeriesEntry{
   464  		SampleIteratorFn: func(iterator chunkenc.Iterator) chunkenc.Iterator {
   465  			return iter
   466  		},
   467  	})
   468  }
   469  
   470  func (s *mergeChunkSeries) Labels() labels.Labels {
   471  	return s.lset
   472  }
   473  
   474  func (s *mergeChunkSeries) Iterator(iterator chunks.Iterator) chunks.Iterator {
   475  	if len(s.ss) == 0 {
   476  		return nil
   477  	}
   478  	if len(s.ss) == 1 {
   479  		return storage.NewSeriesToChunkEncoder(s.ss[0]).Iterator(iterator)
   480  	}
   481  
   482  	return storage.NewSeriesToChunkEncoder(storage.ChainedSeriesMerge(s.ss...)).Iterator(iterator)
   483  }
   484  
   485  type errorOnlyStringIter struct {
   486  	err error
   487  }
   488  
   489  func (errorOnlyStringIter) Next() bool   { return false }
   490  func (errorOnlyStringIter) At() string   { return "" }
   491  func (s errorOnlyStringIter) Err() error { return s.err }
   492  
   493  type listChunkSeriesSet struct {
   494  	css []storage.ChunkSeries
   495  	idx int
   496  }
   497  
   498  func newListChunkSeriesSet(css ...storage.ChunkSeries) storage.ChunkSeriesSet {
   499  	return &listChunkSeriesSet{css: css, idx: -1}
   500  }
   501  
   502  func (s *listChunkSeriesSet) Next() bool {
   503  	s.idx++
   504  	return s.idx < len(s.css)
   505  }
   506  
   507  func (s *listChunkSeriesSet) At() storage.ChunkSeries    { return s.css[s.idx] }
   508  func (s *listChunkSeriesSet) Err() error                 { return nil }
   509  func (s *listChunkSeriesSet) Warnings() storage.Warnings { return nil }