github.com/thanos-io/thanos@v0.32.5/pkg/compactv2/modifiers.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package compactv2 5 6 import ( 7 "math" 8 "sort" 9 10 "github.com/pkg/errors" 11 "github.com/prometheus/prometheus/model/histogram" 12 "github.com/prometheus/prometheus/model/labels" 13 "github.com/prometheus/prometheus/model/relabel" 14 "github.com/prometheus/prometheus/storage" 15 "github.com/prometheus/prometheus/tsdb" 16 "github.com/prometheus/prometheus/tsdb/chunkenc" 17 "github.com/prometheus/prometheus/tsdb/chunks" 18 "github.com/prometheus/prometheus/tsdb/index" 19 "github.com/prometheus/prometheus/tsdb/tombstones" 20 21 "github.com/thanos-io/thanos/pkg/block/metadata" 22 ) 23 24 type Modifier interface { 25 Modify(sym index.StringIter, set storage.ChunkSeriesSet, log ChangeLogger, p ProgressLogger) (index.StringIter, storage.ChunkSeriesSet) 26 } 27 28 type DeletionModifier struct { 29 deletions []metadata.DeletionRequest 30 } 31 32 func WithDeletionModifier(deletions ...metadata.DeletionRequest) *DeletionModifier { 33 return &DeletionModifier{deletions: deletions} 34 } 35 36 func (d *DeletionModifier) Modify(sym index.StringIter, set storage.ChunkSeriesSet, log ChangeLogger, p ProgressLogger) (index.StringIter, storage.ChunkSeriesSet) { 37 // TODO(bwplotka): Modify symbols as well. Otherwise large string will be kept forever. 38 // This is however what Prometheus already does. It does not increase index size too much though. 39 // This needs a bit of work due to sorting and tracking required to rebuild them.pp 40 41 return sym, &delModifierSeriesSet{ 42 d: d, 43 44 ChunkSeriesSet: set, 45 log: log, 46 p: p, 47 } 48 } 49 50 type delModifierSeriesSet struct { 51 storage.ChunkSeriesSet 52 53 d *DeletionModifier 54 log ChangeLogger 55 p ProgressLogger 56 57 curr *storage.ChunkSeriesEntry 58 err error 59 } 60 61 func (d *delModifierSeriesSet) Next() bool { 62 SeriesLoop: 63 for d.ChunkSeriesSet.Next() { 64 s := d.ChunkSeriesSet.At() 65 lbls := s.Labels() 66 67 var intervals tombstones.Intervals 68 DeletionsLoop: 69 for _, deletions := range d.d.deletions { 70 for _, m := range deletions.Matchers { 71 v := lbls.Get(m.Name) 72 73 // Only if all matchers in the deletion request are matched can we proceed to deletion. 74 if v == "" || !m.Matches(v) { 75 continue DeletionsLoop 76 } 77 } 78 if len(deletions.Intervals) > 0 { 79 for _, in := range deletions.Intervals { 80 intervals = intervals.Add(in) 81 } 82 continue 83 } 84 85 // Special case: Delete whole series. 86 chksIter := s.Iterator(nil) 87 var chks []chunks.Meta 88 for chksIter.Next() { 89 chks = append(chks, chksIter.At()) 90 } 91 if d.err = chksIter.Err(); d.err != nil { 92 return false 93 } 94 95 var deleted tombstones.Intervals 96 if len(chks) > 0 { 97 deleted = deleted.Add(tombstones.Interval{Mint: chks[0].MinTime, Maxt: chks[len(chks)-1].MaxTime}) 98 } 99 d.log.DeleteSeries(lbls, deleted) 100 d.p.SeriesProcessed() 101 continue SeriesLoop 102 } 103 104 d.curr = &storage.ChunkSeriesEntry{ 105 Lset: lbls, 106 ChunkIteratorFn: func(it chunks.Iterator) chunks.Iterator { 107 return NewDelGenericSeriesIterator(s.Iterator(it), intervals, func(intervals tombstones.Intervals) { 108 d.log.DeleteSeries(lbls, intervals) 109 }).ToChunkSeriesIterator() 110 }, 111 } 112 return true 113 } 114 return false 115 } 116 117 // intersection returns intersection between interval and range of intervals. 118 func intersection(i tombstones.Interval, dranges tombstones.Intervals) tombstones.Intervals { 119 var ret tombstones.Intervals 120 for _, r := range dranges { 121 isLeftIn := r.Mint <= i.Maxt 122 isRightIn := i.Mint <= r.Maxt 123 if !isLeftIn || !isRightIn { 124 continue 125 } 126 intersection := tombstones.Interval{Mint: r.Mint, Maxt: r.Maxt} 127 if intersection.Mint < i.Mint { 128 intersection.Mint = i.Mint 129 } 130 if intersection.Maxt > i.Maxt { 131 intersection.Maxt = i.Maxt 132 } 133 ret = ret.Add(intersection) 134 } 135 return ret 136 } 137 138 func (d *delModifierSeriesSet) At() storage.ChunkSeries { 139 return d.curr 140 } 141 142 func (d *delModifierSeriesSet) Err() error { 143 if d.err != nil { 144 return d.err 145 } 146 return d.ChunkSeriesSet.Err() 147 } 148 149 func (d *delModifierSeriesSet) Warnings() storage.Warnings { 150 return d.ChunkSeriesSet.Warnings() 151 } 152 153 type delGenericSeriesIterator struct { 154 chks chunks.Iterator 155 156 err error 157 bufIter *tsdb.DeletedIterator 158 intervals tombstones.Intervals 159 160 currDelIter chunkenc.Iterator 161 currChkMeta chunks.Meta 162 logDelete func(intervals tombstones.Intervals) 163 deleted tombstones.Intervals 164 } 165 166 func NewDelGenericSeriesIterator( 167 chks chunks.Iterator, 168 intervals tombstones.Intervals, 169 logDelete func(intervals tombstones.Intervals), 170 ) *delGenericSeriesIterator { 171 return &delGenericSeriesIterator{ 172 chks: chks, 173 bufIter: &tsdb.DeletedIterator{}, 174 intervals: intervals, 175 logDelete: logDelete, 176 } 177 } 178 179 func (d *delGenericSeriesIterator) next() (ok bool) { 180 if d.err != nil { 181 return false 182 } 183 184 for d.chks.Next() { 185 d.currChkMeta = d.chks.At() 186 187 if chk := (tombstones.Interval{Mint: d.currChkMeta.MinTime, Maxt: d.currChkMeta.MaxTime}); chk.IsSubrange(d.intervals) { 188 d.deleted = d.deleted.Add(chk) 189 continue 190 } 191 d.bufIter.Intervals = d.bufIter.Intervals[:0] 192 for _, interval := range d.intervals { 193 if d.currChkMeta.OverlapsClosedInterval(interval.Mint, interval.Maxt) { 194 d.bufIter.Intervals = d.bufIter.Intervals.Add(interval) 195 } 196 } 197 if len(d.bufIter.Intervals) == 0 { 198 d.currDelIter = nil 199 return true 200 } 201 202 for _, del := range intersection(tombstones.Interval{Mint: d.currChkMeta.MinTime, Maxt: d.currChkMeta.MaxTime}, d.bufIter.Intervals) { 203 d.deleted = d.deleted.Add(del) 204 } 205 206 // We don't want full chunk, take just part of it. 207 d.bufIter.Iter = d.currChkMeta.Chunk.Iterator(nil) 208 d.currDelIter = d.bufIter 209 return true 210 } 211 if len(d.deleted) > 0 { 212 d.logDelete(d.deleted) 213 } 214 return false 215 } 216 217 func (d *delGenericSeriesIterator) Err() error { 218 if d.err != nil { 219 return d.err 220 } 221 return d.chks.Err() 222 } 223 224 func (d *delGenericSeriesIterator) ToSeriesIterator() chunkenc.Iterator { 225 return &delSeriesIterator{delGenericSeriesIterator: d} 226 } 227 func (d *delGenericSeriesIterator) ToChunkSeriesIterator() chunks.Iterator { 228 return &delChunkSeriesIterator{delGenericSeriesIterator: d} 229 } 230 231 // delSeriesIterator allows to iterate over samples for the single series. 232 type delSeriesIterator struct { 233 *delGenericSeriesIterator 234 235 curr chunkenc.Iterator 236 } 237 238 func (p *delSeriesIterator) Next() chunkenc.ValueType { 239 if p.curr == nil { 240 return chunkenc.ValNone 241 } 242 243 if valueType := p.curr.Next(); valueType != chunkenc.ValNone { 244 return valueType 245 } 246 247 for p.next() { 248 if p.currDelIter != nil { 249 p.curr = p.currDelIter 250 } else { 251 p.curr = p.currChkMeta.Chunk.Iterator(nil) 252 } 253 if valueType := p.curr.Next(); valueType != chunkenc.ValNone { 254 return valueType 255 } 256 } 257 return chunkenc.ValNone 258 } 259 260 func (p *delSeriesIterator) Seek(t int64) chunkenc.ValueType { 261 if p.curr == nil { 262 return chunkenc.ValNone 263 } 264 265 if valueType := p.curr.Seek(t); valueType != chunkenc.ValNone { 266 return valueType 267 } 268 for p.Next() != chunkenc.ValNone { 269 if valueType := p.curr.Seek(t); valueType != chunkenc.ValNone { 270 return valueType 271 } 272 } 273 return chunkenc.ValNone 274 } 275 276 func (p *delSeriesIterator) At() (int64, float64) { return p.curr.At() } 277 278 // TODO(rabenhorst): Needs to be implemented for native histogram support. 279 func (p *delSeriesIterator) AtHistogram() (int64, *histogram.Histogram) { 280 panic("not implemented") 281 } 282 283 func (p *delSeriesIterator) AtFloatHistogram() (int64, *histogram.FloatHistogram) { 284 panic("not implemented") 285 } 286 287 func (p *delSeriesIterator) AtT() int64 { 288 t, _ := p.curr.At() 289 return t 290 } 291 292 func (p *delSeriesIterator) Err() error { 293 if err := p.delGenericSeriesIterator.Err(); err != nil { 294 return err 295 } 296 if p.curr != nil { 297 return p.curr.Err() 298 } 299 return nil 300 } 301 302 type delChunkSeriesIterator struct { 303 *delGenericSeriesIterator 304 305 curr chunks.Meta 306 } 307 308 func (p *delChunkSeriesIterator) Next() bool { 309 if !p.next() { 310 return false 311 } 312 313 p.curr = p.currChkMeta 314 if p.currDelIter == nil { 315 return true 316 } 317 318 // Re-encode the chunk if iterator is provider. This means that it has some samples to be deleted or chunk is opened. 319 newChunk := chunkenc.NewXORChunk() 320 app, err := newChunk.Appender() 321 if err != nil { 322 p.err = err 323 return false 324 } 325 326 if p.currDelIter.Next() == chunkenc.ValNone { 327 if err := p.currDelIter.Err(); err != nil { 328 p.err = errors.Wrap(err, "iterate chunk while re-encoding") 329 return false 330 } 331 332 // Empty chunk, this should not happen, as we assume full deletions being filtered before this iterator. 333 p.err = errors.Wrap(err, "populateWithDelChunkSeriesIterator: unexpected empty chunk found while rewriting chunk") 334 return false 335 } 336 337 t, v := p.currDelIter.At() 338 p.curr.MinTime = t 339 app.Append(t, v) 340 341 for p.currDelIter.Next() != chunkenc.ValNone { 342 t, v = p.currDelIter.At() 343 app.Append(t, v) 344 } 345 if err := p.currDelIter.Err(); err != nil { 346 p.err = errors.Wrap(err, "iterate chunk while re-encoding") 347 return false 348 } 349 350 p.curr.Chunk = newChunk 351 p.curr.MaxTime = t 352 return true 353 } 354 355 func (p *delChunkSeriesIterator) At() chunks.Meta { return p.curr } 356 357 type RelabelModifier struct { 358 relabels []*relabel.Config 359 } 360 361 func WithRelabelModifier(relabels ...*relabel.Config) *RelabelModifier { 362 return &RelabelModifier{relabels: relabels} 363 } 364 365 func (d *RelabelModifier) Modify(_ index.StringIter, set storage.ChunkSeriesSet, log ChangeLogger, p ProgressLogger) (index.StringIter, storage.ChunkSeriesSet) { 366 // Gather symbols. 367 symbols := make(map[string]struct{}) 368 chunkSeriesMap := make(map[string]*mergeChunkSeries) 369 370 for set.Next() { 371 s := set.At() 372 lbls := s.Labels() 373 chksIter := s.Iterator(nil) 374 375 // The labels have to be copied because `relabel.Process` is now overwriting the original 376 // labels to same memory. This happens since Prometheus v2.39.0. 377 if processedLabels, _ := relabel.Process(lbls.Copy(), d.relabels...); len(processedLabels) == 0 { 378 // Special case: Delete whole series if no labels are present. 379 var ( 380 minT int64 = math.MaxInt64 381 maxT int64 = math.MinInt64 382 ) 383 for chksIter.Next() { 384 c := chksIter.At() 385 if c.MinTime < minT { 386 minT = c.MinTime 387 } 388 if c.MaxTime > maxT { 389 maxT = c.MaxTime 390 } 391 } 392 393 if err := chksIter.Err(); err != nil { 394 return errorOnlyStringIter{err: err}, nil 395 } 396 397 var deleted tombstones.Intervals 398 // If minTime is set then there is at least one chunk. 399 if minT != math.MaxInt64 { 400 deleted = deleted.Add(tombstones.Interval{Mint: minT, Maxt: maxT}) 401 } 402 log.DeleteSeries(lbls, deleted) 403 p.SeriesProcessed() 404 } else { 405 for _, lb := range processedLabels { 406 symbols[lb.Name] = struct{}{} 407 symbols[lb.Value] = struct{}{} 408 } 409 410 lbStr := processedLabels.String() 411 if _, ok := chunkSeriesMap[lbStr]; !ok { 412 chunkSeriesMap[lbStr] = newChunkSeriesBuilder(processedLabels) 413 } 414 cs := chunkSeriesMap[lbStr] 415 416 // We have to iterate over the chunks and populate them here as 417 // lazyPopulateChunkSeriesSet reuses chunks and previous chunks 418 // will be overwritten at set.Next() call. 419 for chksIter.Next() { 420 c := chksIter.At() 421 cs.addIter(c.Chunk.Iterator(nil)) 422 } 423 if err := chksIter.Err(); err != nil { 424 return errorOnlyStringIter{err}, nil 425 } 426 427 if !labels.Equal(lbls, processedLabels) { 428 log.ModifySeries(lbls, processedLabels) 429 } 430 } 431 } 432 433 symbolsSlice := make([]string, 0, len(symbols)) 434 for s := range symbols { 435 symbolsSlice = append(symbolsSlice, s) 436 } 437 sort.Strings(symbolsSlice) 438 439 chunkSeriesSet := make([]storage.ChunkSeries, 0, len(chunkSeriesMap)) 440 for _, chunkSeries := range chunkSeriesMap { 441 chunkSeriesSet = append(chunkSeriesSet, chunkSeries) 442 } 443 sort.Slice(chunkSeriesSet, func(i, j int) bool { 444 return labels.Compare(chunkSeriesSet[i].Labels(), chunkSeriesSet[j].Labels()) < 0 445 }) 446 return index.NewStringListIter(symbolsSlice), newListChunkSeriesSet(chunkSeriesSet...) 447 } 448 449 // mergeChunkSeries build storage.ChunkSeries from several chunkenc.Iterator. 450 type mergeChunkSeries struct { 451 lset labels.Labels 452 ss []storage.Series 453 } 454 455 func newChunkSeriesBuilder(lset labels.Labels) *mergeChunkSeries { 456 return &mergeChunkSeries{ 457 lset: lset, 458 ss: make([]storage.Series, 0), 459 } 460 } 461 462 func (s *mergeChunkSeries) addIter(iter chunkenc.Iterator) { 463 s.ss = append(s.ss, &storage.SeriesEntry{ 464 SampleIteratorFn: func(iterator chunkenc.Iterator) chunkenc.Iterator { 465 return iter 466 }, 467 }) 468 } 469 470 func (s *mergeChunkSeries) Labels() labels.Labels { 471 return s.lset 472 } 473 474 func (s *mergeChunkSeries) Iterator(iterator chunks.Iterator) chunks.Iterator { 475 if len(s.ss) == 0 { 476 return nil 477 } 478 if len(s.ss) == 1 { 479 return storage.NewSeriesToChunkEncoder(s.ss[0]).Iterator(iterator) 480 } 481 482 return storage.NewSeriesToChunkEncoder(storage.ChainedSeriesMerge(s.ss...)).Iterator(iterator) 483 } 484 485 type errorOnlyStringIter struct { 486 err error 487 } 488 489 func (errorOnlyStringIter) Next() bool { return false } 490 func (errorOnlyStringIter) At() string { return "" } 491 func (s errorOnlyStringIter) Err() error { return s.err } 492 493 type listChunkSeriesSet struct { 494 css []storage.ChunkSeries 495 idx int 496 } 497 498 func newListChunkSeriesSet(css ...storage.ChunkSeries) storage.ChunkSeriesSet { 499 return &listChunkSeriesSet{css: css, idx: -1} 500 } 501 502 func (s *listChunkSeriesSet) Next() bool { 503 s.idx++ 504 return s.idx < len(s.css) 505 } 506 507 func (s *listChunkSeriesSet) At() storage.ChunkSeries { return s.css[s.idx] } 508 func (s *listChunkSeriesSet) Err() error { return nil } 509 func (s *listChunkSeriesSet) Warnings() storage.Warnings { return nil }