github.com/thanos-io/thanos@v0.32.5/pkg/block/index.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package block
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"hash/crc32"
    10  	"math"
    11  	"math/rand"
    12  	"path/filepath"
    13  	"sort"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/go-kit/log"
    18  	"github.com/go-kit/log/level"
    19  	"github.com/oklog/ulid"
    20  	"github.com/pkg/errors"
    21  	"github.com/prometheus/prometheus/model/labels"
    22  	"github.com/prometheus/prometheus/storage"
    23  	"github.com/prometheus/prometheus/tsdb"
    24  	"github.com/prometheus/prometheus/tsdb/chunks"
    25  	"github.com/prometheus/prometheus/tsdb/index"
    26  
    27  	"github.com/thanos-io/thanos/pkg/block/metadata"
    28  	"github.com/thanos-io/thanos/pkg/runutil"
    29  )
    30  
    31  // VerifyIndex does a full run over a block index and verifies that it fulfills the order invariants.
    32  func VerifyIndex(logger log.Logger, fn string, minTime, maxTime int64) error {
    33  	stats, err := GatherIndexHealthStats(logger, fn, minTime, maxTime)
    34  	if err != nil {
    35  		return err
    36  	}
    37  
    38  	return stats.AnyErr()
    39  }
    40  
    41  type HealthStats struct {
    42  	// TotalSeries represents total number of series in block.
    43  	TotalSeries int64
    44  	// OutOfOrderSeries represents number of series that have out of order chunks.
    45  	OutOfOrderSeries int
    46  
    47  	// OutOfOrderChunks represents number of chunks that are out of order (older time range is after younger one).
    48  	OutOfOrderChunks int
    49  	// DuplicatedChunks represents number of chunks with same time ranges within same series, potential duplicates.
    50  	DuplicatedChunks int
    51  	// OutsideChunks represents number of all chunks that are before or after time range specified in block meta.
    52  	OutsideChunks int
    53  	// CompleteOutsideChunks is subset of OutsideChunks that will be never accessed. They are completely out of time range specified in block meta.
    54  	CompleteOutsideChunks int
    55  	// Issue347OutsideChunks represents subset of OutsideChunks that are outsiders caused by https://github.com/prometheus/tsdb/issues/347
    56  	// and is something that Thanos handle.
    57  	//
    58  	// Specifically we mean here chunks with minTime == block.maxTime and maxTime > block.MaxTime. These are
    59  	// are segregated into separate counters. These chunks are safe to be deleted, since they are duplicated across 2 blocks.
    60  	Issue347OutsideChunks int
    61  	// OutOfOrderLabels represents the number of postings that contained out
    62  	// of order labels, a bug present in Prometheus 2.8.0 and below.
    63  	OutOfOrderLabels int
    64  
    65  	// Debug Statistics.
    66  	SeriesMinLifeDuration time.Duration
    67  	SeriesAvgLifeDuration time.Duration
    68  	SeriesMaxLifeDuration time.Duration
    69  
    70  	SeriesMinLifeDurationWithoutSingleSampleSeries time.Duration
    71  	SeriesAvgLifeDurationWithoutSingleSampleSeries time.Duration
    72  	SeriesMaxLifeDurationWithoutSingleSampleSeries time.Duration
    73  
    74  	SeriesMinChunks int64
    75  	SeriesAvgChunks int64
    76  	SeriesMaxChunks int64
    77  
    78  	TotalChunks int64
    79  
    80  	ChunkMinDuration time.Duration
    81  	ChunkAvgDuration time.Duration
    82  	ChunkMaxDuration time.Duration
    83  
    84  	ChunkMinSize int64
    85  	ChunkAvgSize int64
    86  	ChunkMaxSize int64
    87  
    88  	SeriesMinSize int64
    89  	SeriesAvgSize int64
    90  	SeriesMaxSize int64
    91  
    92  	SingleSampleSeries int64
    93  	SingleSampleChunks int64
    94  
    95  	LabelNamesCount        int64
    96  	MetricLabelValuesCount int64
    97  }
    98  
    99  // OutOfOrderLabelsErr returns an error if the HealthStats object indicates
   100  // postings with out of order labels.  This is corrected by Prometheus Issue
   101  // #5372 and affects Prometheus versions 2.8.0 and below.
   102  func (i HealthStats) OutOfOrderLabelsErr() error {
   103  	if i.OutOfOrderLabels > 0 {
   104  		return errors.Errorf("index contains %d postings with out of order labels",
   105  			i.OutOfOrderLabels)
   106  	}
   107  	return nil
   108  }
   109  
   110  // Issue347OutsideChunksErr returns error if stats indicates issue347 block issue, that is repaired explicitly before compaction (on plan block).
   111  func (i HealthStats) Issue347OutsideChunksErr() error {
   112  	if i.Issue347OutsideChunks > 0 {
   113  		return errors.Errorf("found %d chunks outside the block time range introduced by https://github.com/prometheus/tsdb/issues/347", i.Issue347OutsideChunks)
   114  	}
   115  	return nil
   116  }
   117  
   118  func (i HealthStats) OutOfOrderChunksErr() error {
   119  	if i.OutOfOrderChunks > 0 {
   120  		return errors.New(fmt.Sprintf(
   121  			"%d/%d series have an average of %.3f out-of-order chunks: "+
   122  				"%.3f of these are exact duplicates (in terms of data and time range)",
   123  			i.OutOfOrderSeries,
   124  			i.TotalSeries,
   125  			float64(i.OutOfOrderChunks)/float64(i.OutOfOrderSeries),
   126  			float64(i.DuplicatedChunks)/float64(i.OutOfOrderChunks),
   127  		))
   128  	}
   129  	return nil
   130  }
   131  
   132  // CriticalErr returns error if stats indicates critical block issue, that might solved only by manual repair procedure.
   133  func (i HealthStats) CriticalErr() error {
   134  	var errMsg []string
   135  
   136  	n := i.OutsideChunks - (i.CompleteOutsideChunks + i.Issue347OutsideChunks)
   137  	if n > 0 {
   138  		errMsg = append(errMsg, fmt.Sprintf("found %d chunks non-completely outside the block time range", n))
   139  	}
   140  
   141  	if i.CompleteOutsideChunks > 0 {
   142  		errMsg = append(errMsg, fmt.Sprintf("found %d chunks completely outside the block time range", i.CompleteOutsideChunks))
   143  	}
   144  
   145  	if len(errMsg) > 0 {
   146  		return errors.New(strings.Join(errMsg, ", "))
   147  	}
   148  
   149  	return nil
   150  }
   151  
   152  // AnyErr returns error if stats indicates any block issue.
   153  func (i HealthStats) AnyErr() error {
   154  	var errMsg []string
   155  
   156  	if err := i.CriticalErr(); err != nil {
   157  		errMsg = append(errMsg, err.Error())
   158  	}
   159  
   160  	if err := i.Issue347OutsideChunksErr(); err != nil {
   161  		errMsg = append(errMsg, err.Error())
   162  	}
   163  
   164  	if err := i.OutOfOrderLabelsErr(); err != nil {
   165  		errMsg = append(errMsg, err.Error())
   166  	}
   167  
   168  	if err := i.OutOfOrderChunksErr(); err != nil {
   169  		errMsg = append(errMsg, err.Error())
   170  	}
   171  
   172  	if len(errMsg) > 0 {
   173  		return errors.New(strings.Join(errMsg, ", "))
   174  	}
   175  
   176  	return nil
   177  }
   178  
   179  type minMaxSumInt64 struct {
   180  	sum int64
   181  	min int64
   182  	max int64
   183  
   184  	cnt int64
   185  }
   186  
   187  func newMinMaxSumInt64() minMaxSumInt64 {
   188  	return minMaxSumInt64{
   189  		min: math.MaxInt64,
   190  		max: math.MinInt64,
   191  	}
   192  }
   193  
   194  func (n *minMaxSumInt64) Add(v int64) {
   195  	n.cnt++
   196  	n.sum += v
   197  	if n.min > v {
   198  		n.min = v
   199  	}
   200  	if n.max < v {
   201  		n.max = v
   202  	}
   203  }
   204  
   205  func (n *minMaxSumInt64) Avg() int64 {
   206  	if n.cnt == 0 {
   207  		return 0
   208  	}
   209  	return n.sum / n.cnt
   210  }
   211  
   212  // GatherIndexHealthStats returns useful counters as well as outsider chunks (chunks outside of block time range) that
   213  // helps to assess index health.
   214  // It considers https://github.com/prometheus/tsdb/issues/347 as something that Thanos can handle.
   215  // See HealthStats.Issue347OutsideChunks for details.
   216  func GatherIndexHealthStats(logger log.Logger, fn string, minTime, maxTime int64) (stats HealthStats, err error) {
   217  	r, err := index.NewFileReader(fn)
   218  	if err != nil {
   219  		return stats, errors.Wrap(err, "open index file")
   220  	}
   221  	defer runutil.CloseWithErrCapture(&err, r, "gather index issue file reader")
   222  
   223  	p, err := r.Postings(index.AllPostingsKey())
   224  	if err != nil {
   225  		return stats, errors.Wrap(err, "get all postings")
   226  	}
   227  	var (
   228  		lastLset labels.Labels
   229  		lset     labels.Labels
   230  		builder  labels.ScratchBuilder
   231  		chks     []chunks.Meta
   232  
   233  		seriesLifeDuration                          = newMinMaxSumInt64()
   234  		seriesLifeDurationWithoutSingleSampleSeries = newMinMaxSumInt64()
   235  		seriesChunks                                = newMinMaxSumInt64()
   236  		chunkDuration                               = newMinMaxSumInt64()
   237  		chunkSize                                   = newMinMaxSumInt64()
   238  		seriesSize                                  = newMinMaxSumInt64()
   239  	)
   240  
   241  	lnames, err := r.LabelNames()
   242  	if err != nil {
   243  		return stats, errors.Wrap(err, "label names")
   244  	}
   245  	stats.LabelNamesCount = int64(len(lnames))
   246  
   247  	lvals, err := r.LabelValues("__name__")
   248  	if err != nil {
   249  		return stats, errors.Wrap(err, "metric label values")
   250  	}
   251  	stats.MetricLabelValuesCount = int64(len(lvals))
   252  
   253  	// As of version two all series entries are 16 byte padded. All references
   254  	// we get have to account for that to get the correct offset.
   255  	offsetMultiplier := 1
   256  	version := r.Version()
   257  	if version >= 2 {
   258  		offsetMultiplier = 16
   259  	}
   260  
   261  	// Per series.
   262  	var prevId storage.SeriesRef
   263  	for p.Next() {
   264  		lastLset = append(lastLset[:0], lset...)
   265  
   266  		id := p.At()
   267  		if prevId != 0 {
   268  			// Approximate size.
   269  			seriesSize.Add(int64(id-prevId) * int64(offsetMultiplier))
   270  		}
   271  		prevId = id
   272  		stats.TotalSeries++
   273  
   274  		if err := r.Series(id, &builder, &chks); err != nil {
   275  			return stats, errors.Wrap(err, "read series")
   276  		}
   277  		lset = builder.Labels()
   278  		if len(lset) == 0 {
   279  			return stats, errors.Errorf("empty label set detected for series %d", id)
   280  		}
   281  		if lastLset != nil && labels.Compare(lastLset, lset) >= 0 {
   282  			return stats, errors.Errorf("series %v out of order; previous %v", lset, lastLset)
   283  		}
   284  		l0 := lset[0]
   285  		for _, l := range lset[1:] {
   286  			if l.Name < l0.Name {
   287  				stats.OutOfOrderLabels++
   288  				level.Warn(logger).Log("msg",
   289  					"out-of-order label set: known bug in Prometheus 2.8.0 and below",
   290  					"labelset", lset.String(),
   291  					"series", fmt.Sprintf("%d", id),
   292  				)
   293  			}
   294  			l0 = l
   295  		}
   296  		if len(chks) == 0 {
   297  			return stats, errors.Errorf("empty chunks for series %d", id)
   298  		}
   299  
   300  		ooo := 0
   301  		seriesLifeTimeMs := int64(0)
   302  		// Per chunk in series.
   303  		for i, c := range chks {
   304  			stats.TotalChunks++
   305  
   306  			chkDur := c.MaxTime - c.MinTime
   307  			seriesLifeTimeMs += chkDur
   308  			chunkDuration.Add(chkDur)
   309  			if chkDur == 0 {
   310  				stats.SingleSampleChunks++
   311  			}
   312  
   313  			// Approximate size.
   314  			if i < len(chks)-2 {
   315  				sgmIndex, chkStart := chunks.BlockChunkRef(c.Ref).Unpack()
   316  				sgmIndex2, chkStart2 := chunks.BlockChunkRef(chks[i+1].Ref).Unpack()
   317  				// Skip the case where two chunks are spread into 2 files.
   318  				if sgmIndex == sgmIndex2 {
   319  					chunkSize.Add(int64(chkStart2 - chkStart))
   320  				}
   321  			}
   322  
   323  			// Chunk vs the block ranges.
   324  			if c.MinTime < minTime || c.MaxTime > maxTime {
   325  				stats.OutsideChunks++
   326  				if c.MinTime > maxTime || c.MaxTime < minTime {
   327  					stats.CompleteOutsideChunks++
   328  				} else if c.MinTime == maxTime {
   329  					stats.Issue347OutsideChunks++
   330  				}
   331  			}
   332  
   333  			if i == 0 {
   334  				continue
   335  			}
   336  
   337  			c0 := chks[i-1]
   338  
   339  			// Chunk order within block.
   340  			if c.MinTime > c0.MaxTime {
   341  				continue
   342  			}
   343  
   344  			if c.MinTime == c0.MinTime && c.MaxTime == c0.MaxTime {
   345  				// TODO(bplotka): Calc and check checksum from chunks itself.
   346  				// The chunks can overlap 1:1 in time, but does not have same data.
   347  				// We assume same data for simplicity, but it can be a symptom of error.
   348  				stats.DuplicatedChunks++
   349  				continue
   350  			}
   351  			// Chunks partly overlaps or out of order.
   352  			ooo++
   353  		}
   354  		if ooo > 0 {
   355  			stats.OutOfOrderSeries++
   356  			stats.OutOfOrderChunks += ooo
   357  			level.Debug(logger).Log("msg", "found out of order series", "labels", lset)
   358  		}
   359  
   360  		seriesChunks.Add(int64(len(chks)))
   361  		seriesLifeDuration.Add(seriesLifeTimeMs)
   362  
   363  		if seriesLifeTimeMs == 0 {
   364  			stats.SingleSampleSeries++
   365  		} else {
   366  			seriesLifeDurationWithoutSingleSampleSeries.Add(seriesLifeTimeMs)
   367  		}
   368  	}
   369  	if p.Err() != nil {
   370  		return stats, errors.Wrap(err, "walk postings")
   371  	}
   372  
   373  	stats.SeriesMaxLifeDuration = time.Duration(seriesLifeDuration.max) * time.Millisecond
   374  	stats.SeriesAvgLifeDuration = time.Duration(seriesLifeDuration.Avg()) * time.Millisecond
   375  	stats.SeriesMinLifeDuration = time.Duration(seriesLifeDuration.min) * time.Millisecond
   376  
   377  	stats.SeriesMaxLifeDurationWithoutSingleSampleSeries = time.Duration(seriesLifeDurationWithoutSingleSampleSeries.max) * time.Millisecond
   378  	stats.SeriesAvgLifeDurationWithoutSingleSampleSeries = time.Duration(seriesLifeDurationWithoutSingleSampleSeries.Avg()) * time.Millisecond
   379  	stats.SeriesMinLifeDurationWithoutSingleSampleSeries = time.Duration(seriesLifeDurationWithoutSingleSampleSeries.min) * time.Millisecond
   380  
   381  	stats.SeriesMaxChunks = seriesChunks.max
   382  	stats.SeriesAvgChunks = seriesChunks.Avg()
   383  	stats.SeriesMinChunks = seriesChunks.min
   384  
   385  	stats.ChunkMaxSize = chunkSize.max
   386  	stats.ChunkAvgSize = chunkSize.Avg()
   387  	stats.ChunkMinSize = chunkSize.min
   388  
   389  	stats.SeriesMaxSize = seriesSize.max
   390  	stats.SeriesAvgSize = seriesSize.Avg()
   391  	stats.SeriesMinSize = seriesSize.min
   392  
   393  	stats.ChunkMaxDuration = time.Duration(chunkDuration.max) * time.Millisecond
   394  	stats.ChunkAvgDuration = time.Duration(chunkDuration.Avg()) * time.Millisecond
   395  	stats.ChunkMinDuration = time.Duration(chunkDuration.min) * time.Millisecond
   396  	return stats, nil
   397  }
   398  
   399  type ignoreFnType func(mint, maxt int64, prev *chunks.Meta, curr *chunks.Meta) (bool, error)
   400  
   401  // Repair open the block with given id in dir and creates a new one with fixed data.
   402  // It:
   403  // - removes out of order duplicates
   404  // - all "complete" outsiders (they will not accessed anyway)
   405  // - removes all near "complete" outside chunks introduced by https://github.com/prometheus/tsdb/issues/347.
   406  // Fixable inconsistencies are resolved in the new block.
   407  // TODO(bplotka): https://github.com/thanos-io/thanos/issues/378.
   408  func Repair(logger log.Logger, dir string, id ulid.ULID, source metadata.SourceType, ignoreChkFns ...ignoreFnType) (resid ulid.ULID, err error) {
   409  	if len(ignoreChkFns) == 0 {
   410  		return resid, errors.New("no ignore chunk function specified")
   411  	}
   412  
   413  	bdir := filepath.Join(dir, id.String())
   414  	entropy := rand.New(rand.NewSource(time.Now().UnixNano()))
   415  	resid = ulid.MustNew(ulid.Now(), entropy)
   416  
   417  	meta, err := metadata.ReadFromDir(bdir)
   418  	if err != nil {
   419  		return resid, errors.Wrap(err, "read meta file")
   420  	}
   421  	if meta.Thanos.Downsample.Resolution > 0 {
   422  		return resid, errors.New("cannot repair downsampled block")
   423  	}
   424  
   425  	b, err := tsdb.OpenBlock(logger, bdir, nil)
   426  	if err != nil {
   427  		return resid, errors.Wrap(err, "open block")
   428  	}
   429  	defer runutil.CloseWithErrCapture(&err, b, "repair block reader")
   430  
   431  	indexr, err := b.Index()
   432  	if err != nil {
   433  		return resid, errors.Wrap(err, "open index")
   434  	}
   435  	defer runutil.CloseWithErrCapture(&err, indexr, "repair index reader")
   436  
   437  	chunkr, err := b.Chunks()
   438  	if err != nil {
   439  		return resid, errors.Wrap(err, "open chunks")
   440  	}
   441  	defer runutil.CloseWithErrCapture(&err, chunkr, "repair chunk reader")
   442  
   443  	resdir := filepath.Join(dir, resid.String())
   444  
   445  	chunkw, err := chunks.NewWriter(filepath.Join(resdir, ChunksDirname))
   446  	if err != nil {
   447  		return resid, errors.Wrap(err, "open chunk writer")
   448  	}
   449  	defer runutil.CloseWithErrCapture(&err, chunkw, "repair chunk writer")
   450  
   451  	indexw, err := index.NewWriter(context.TODO(), filepath.Join(resdir, IndexFilename))
   452  	if err != nil {
   453  		return resid, errors.Wrap(err, "open index writer")
   454  	}
   455  	defer runutil.CloseWithErrCapture(&err, indexw, "repair index writer")
   456  
   457  	// TODO(fabxc): adapt so we properly handle the version once we update to an upstream
   458  	// that has multiple.
   459  	resmeta := *meta
   460  	resmeta.ULID = resid
   461  	resmeta.Stats = tsdb.BlockStats{} // Reset stats.
   462  	resmeta.Thanos.Source = source    // Update source.
   463  
   464  	if err := rewrite(logger, indexr, chunkr, indexw, chunkw, &resmeta, ignoreChkFns); err != nil {
   465  		return resid, errors.Wrap(err, "rewrite block")
   466  	}
   467  	resmeta.Thanos.SegmentFiles = GetSegmentFiles(resdir)
   468  	if err := resmeta.WriteToDir(logger, resdir); err != nil {
   469  		return resid, err
   470  	}
   471  	// TSDB may rewrite metadata in bdir.
   472  	// TODO: This is not needed in newer TSDB code. See https://github.com/prometheus/tsdb/pull/637.
   473  	if err := meta.WriteToDir(logger, bdir); err != nil {
   474  		return resid, err
   475  	}
   476  	return resid, nil
   477  }
   478  
   479  var castagnoli = crc32.MakeTable(crc32.Castagnoli)
   480  
   481  func IgnoreCompleteOutsideChunk(mint, maxt int64, _, curr *chunks.Meta) (bool, error) {
   482  	if curr.MinTime > maxt || curr.MaxTime < mint {
   483  		// "Complete" outsider. Ignore.
   484  		return true, nil
   485  	}
   486  	return false, nil
   487  }
   488  
   489  func IgnoreIssue347OutsideChunk(_, maxt int64, _, curr *chunks.Meta) (bool, error) {
   490  	if curr.MinTime == maxt {
   491  		// "Near" outsider from issue https://github.com/prometheus/tsdb/issues/347. Ignore.
   492  		return true, nil
   493  	}
   494  	return false, nil
   495  }
   496  
   497  func IgnoreDuplicateOutsideChunk(_, _ int64, last, curr *chunks.Meta) (bool, error) {
   498  	if last == nil {
   499  		return false, nil
   500  	}
   501  
   502  	if curr.MinTime > last.MaxTime {
   503  		return false, nil
   504  	}
   505  
   506  	// Verify that the overlapping chunks are exact copies so we can safely discard
   507  	// the current one.
   508  	if curr.MinTime != last.MinTime || curr.MaxTime != last.MaxTime {
   509  		return false, errors.Errorf("non-sequential chunks not equal: [%d, %d] and [%d, %d]",
   510  			last.MinTime, last.MaxTime, curr.MinTime, curr.MaxTime)
   511  	}
   512  	ca := crc32.Checksum(last.Chunk.Bytes(), castagnoli)
   513  	cb := crc32.Checksum(curr.Chunk.Bytes(), castagnoli)
   514  
   515  	if ca != cb {
   516  		return false, errors.Errorf("non-sequential chunks not equal: %x and %x", ca, cb)
   517  	}
   518  
   519  	return true, nil
   520  }
   521  
   522  // sanitizeChunkSequence ensures order of the input chunks and drops any duplicates.
   523  // It errors if the sequence contains non-dedupable overlaps.
   524  func sanitizeChunkSequence(chks []chunks.Meta, mint, maxt int64, ignoreChkFns []ignoreFnType) ([]chunks.Meta, error) {
   525  	if len(chks) == 0 {
   526  		return nil, nil
   527  	}
   528  	// First, ensure that chunks are ordered by their start time.
   529  	sort.Slice(chks, func(i, j int) bool {
   530  		return chks[i].MinTime < chks[j].MinTime
   531  	})
   532  
   533  	// Remove duplicates, complete outsiders and near outsiders.
   534  	repl := make([]chunks.Meta, 0, len(chks))
   535  	var last *chunks.Meta
   536  
   537  OUTER:
   538  	// This compares the current chunk to the chunk from the last iteration
   539  	// by pointers.  If we use "i, c := range chks" the variable c is a new
   540  	// variable who's address doesn't change through the entire loop.
   541  	// The current element of the chks slice is copied into it. We must take
   542  	// the address of the indexed slice instead.
   543  	for i := range chks {
   544  		for _, ignoreChkFn := range ignoreChkFns {
   545  			ignore, err := ignoreChkFn(mint, maxt, last, &chks[i])
   546  			if err != nil {
   547  				return nil, errors.Wrap(err, "ignore function")
   548  			}
   549  
   550  			if ignore {
   551  				continue OUTER
   552  			}
   553  		}
   554  
   555  		last = &chks[i]
   556  		repl = append(repl, chks[i])
   557  	}
   558  
   559  	return repl, nil
   560  }
   561  
   562  type seriesRepair struct {
   563  	lset labels.Labels
   564  	chks []chunks.Meta
   565  }
   566  
   567  // rewrite writes all data from the readers back into the writers while cleaning
   568  // up mis-ordered and duplicated chunks.
   569  func rewrite(
   570  	logger log.Logger,
   571  	indexr tsdb.IndexReader, chunkr tsdb.ChunkReader,
   572  	indexw tsdb.IndexWriter, chunkw tsdb.ChunkWriter,
   573  	meta *metadata.Meta,
   574  	ignoreChkFns []ignoreFnType,
   575  ) error {
   576  	symbols := indexr.Symbols()
   577  	for symbols.Next() {
   578  		if err := indexw.AddSymbol(symbols.At()); err != nil {
   579  			return errors.Wrap(err, "add symbol")
   580  		}
   581  	}
   582  	if symbols.Err() != nil {
   583  		return errors.Wrap(symbols.Err(), "next symbol")
   584  	}
   585  
   586  	all, err := indexr.Postings(index.AllPostingsKey())
   587  	if err != nil {
   588  		return errors.Wrap(err, "postings")
   589  	}
   590  	all = indexr.SortedPostings(all)
   591  
   592  	// We fully rebuild the postings list index from merged series.
   593  	var (
   594  		postings = index.NewMemPostings()
   595  		values   = map[string]stringset{}
   596  		i        = storage.SeriesRef(0)
   597  		series   = []seriesRepair{}
   598  	)
   599  
   600  	var builder labels.ScratchBuilder
   601  	var chks []chunks.Meta
   602  	for all.Next() {
   603  		id := all.At()
   604  
   605  		if err := indexr.Series(id, &builder, &chks); err != nil {
   606  			return errors.Wrap(err, "series")
   607  		}
   608  		// Make sure labels are in sorted order.
   609  		builder.Sort()
   610  
   611  		for i, c := range chks {
   612  			chks[i].Chunk, err = chunkr.Chunk(c)
   613  			if err != nil {
   614  				return errors.Wrap(err, "chunk read")
   615  			}
   616  		}
   617  
   618  		chks, err := sanitizeChunkSequence(chks, meta.MinTime, meta.MaxTime, ignoreChkFns)
   619  		if err != nil {
   620  			return err
   621  		}
   622  
   623  		if len(chks) == 0 {
   624  			continue
   625  		}
   626  
   627  		series = append(series, seriesRepair{
   628  			lset: builder.Labels(),
   629  			chks: chks,
   630  		})
   631  	}
   632  
   633  	if all.Err() != nil {
   634  		return errors.Wrap(all.Err(), "iterate series")
   635  	}
   636  
   637  	// Sort the series, if labels are re-ordered then the ordering of series
   638  	// will be different.
   639  	sort.Slice(series, func(i, j int) bool {
   640  		return labels.Compare(series[i].lset, series[j].lset) < 0
   641  	})
   642  
   643  	lastSet := labels.Labels{}
   644  	// Build a new TSDB block.
   645  	for _, s := range series {
   646  		// The TSDB library will throw an error if we add a series with
   647  		// identical labels as the last series. This means that we have
   648  		// discovered a duplicate time series in the old block. We drop
   649  		// all duplicate series preserving the first one.
   650  		// TODO: Add metric to count dropped series if repair becomes a daemon
   651  		// rather than a batch job.
   652  		if labels.Compare(lastSet, s.lset) == 0 {
   653  			level.Warn(logger).Log("msg",
   654  				"dropping duplicate series in tsdb block found",
   655  				"labelset", s.lset.String(),
   656  			)
   657  			continue
   658  		}
   659  		if err := chunkw.WriteChunks(s.chks...); err != nil {
   660  			return errors.Wrap(err, "write chunks")
   661  		}
   662  		if err := indexw.AddSeries(i, s.lset, s.chks...); err != nil {
   663  			return errors.Wrap(err, "add series")
   664  		}
   665  
   666  		meta.Stats.NumChunks += uint64(len(s.chks))
   667  		meta.Stats.NumSeries++
   668  
   669  		for _, chk := range s.chks {
   670  			meta.Stats.NumSamples += uint64(chk.Chunk.NumSamples())
   671  		}
   672  
   673  		for _, l := range s.lset {
   674  			valset, ok := values[l.Name]
   675  			if !ok {
   676  				valset = stringset{}
   677  				values[l.Name] = valset
   678  			}
   679  			valset.set(l.Value)
   680  		}
   681  		postings.Add(i, s.lset)
   682  		i++
   683  		lastSet = s.lset
   684  	}
   685  	return nil
   686  }
   687  
   688  type stringset map[string]struct{}
   689  
   690  func (ss stringset) set(s string) {
   691  	ss[s] = struct{}{}
   692  }
   693  
   694  func (ss stringset) String() string {
   695  	return strings.Join(ss.slice(), ",")
   696  }
   697  
   698  func (ss stringset) slice() []string {
   699  	slice := make([]string, 0, len(ss))
   700  	for k := range ss {
   701  		slice = append(slice, k)
   702  	}
   703  	sort.Strings(slice)
   704  	return slice
   705  }