github.com/thanos-io/thanos@v0.32.5/pkg/verifier/duplicated_compaction.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package verifier
     5  
     6  import (
     7  	"fmt"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/go-kit/log/level"
    12  	"github.com/oklog/ulid"
    13  	"github.com/pkg/errors"
    14  	"github.com/prometheus/prometheus/tsdb"
    15  )
    16  
    17  // DuplicatedCompactionBlocks is for bug fixed in https://github.com/thanos-io/thanos/commit/94e26c63e52ba45b713fd998638d0e7b2492664f.
    18  // Bug resulted in source block not being removed immediately after compaction, so we were compacting again and again same sources
    19  // until sync-delay passes.
    20  // The expected print of this are same overlapped blocks with exactly the same sources, time ranges and stats.
    21  // If repair is enabled, all but one duplicates are safely deleted.
    22  type DuplicatedCompactionBlocks struct{}
    23  
    24  func (DuplicatedCompactionBlocks) IssueID() string { return "duplicated_compaction" }
    25  
    26  func (DuplicatedCompactionBlocks) VerifyRepair(ctx Context, idMatcher func(ulid.ULID) bool, repair bool) error {
    27  	if idMatcher != nil {
    28  		return errors.Errorf("id matching is not supported")
    29  	}
    30  
    31  	level.Info(ctx.Logger).Log("msg", "started verifying issue", "with-repair", repair)
    32  
    33  	overlaps, err := fetchOverlaps(ctx, ctx.Fetcher)
    34  	if err != nil {
    35  		return errors.Wrap(err, "fetch overlaps")
    36  	}
    37  
    38  	if len(overlaps) == 0 {
    39  		// All good.
    40  		return nil
    41  	}
    42  
    43  	// We have overlaps, let's see if they include exact duplicates. If yes, let's put them into distinct set.
    44  	var (
    45  		toKillLookup = map[ulid.ULID]struct{}{}
    46  		toKill       []ulid.ULID
    47  	)
    48  
    49  	// Loop over label-resolution groups.
    50  	for k, o := range overlaps {
    51  		// Loop over overlap group.
    52  		for r, blocks := range o {
    53  			dups := duplicatedBlocks(blocks)
    54  
    55  			// Loop over duplicates sets.
    56  			for _, d := range dups {
    57  				level.Warn(ctx.Logger).Log("msg", "found duplicated blocks", "group", k, "range-min", r.Min, "range-max", r.Max, "kill", sprintMetas(d[1:]))
    58  
    59  				for _, m := range d[1:] {
    60  					if _, ok := toKillLookup[m.ULID]; ok {
    61  						continue
    62  					}
    63  
    64  					toKillLookup[m.ULID] = struct{}{}
    65  					toKill = append(toKill, m.ULID)
    66  				}
    67  			}
    68  
    69  			if len(dups) == 0 {
    70  				level.Warn(ctx.Logger).Log("msg", "found overlapped blocks, but all of the blocks are unique. Seems like unrelated issue. Ignoring overlap", "group", k,
    71  					"range", fmt.Sprintf("%v", r), "overlap", sprintMetas(blocks))
    72  			}
    73  		}
    74  	}
    75  
    76  	level.Warn(ctx.Logger).Log("msg", "Found duplicated blocks that are ok to be removed", "ULIDs", fmt.Sprintf("%v", toKill), "num", len(toKill))
    77  	if !repair {
    78  		return nil
    79  	}
    80  
    81  	for i, id := range toKill {
    82  		if err := BackupAndDelete(ctx, id); err != nil {
    83  			return err
    84  		}
    85  		level.Info(ctx.Logger).Log("msg", "Removed duplicated block", "id", id, "to-be-removed", len(toKill)-(i+1), "removed", i+1)
    86  	}
    87  
    88  	level.Info(ctx.Logger).Log("msg", "Removed all duplicated blocks. You might want to rerun this verify to check if there is still any unrelated overlap")
    89  	return nil
    90  }
    91  
    92  // duplicatedBlocks returns duplicated blocks that have exactly same range, sources and stats.
    93  // If block is unique it is not included in the resulted blocs.
    94  func duplicatedBlocks(blocks []tsdb.BlockMeta) (res [][]tsdb.BlockMeta) {
    95  	var dups [][]tsdb.BlockMeta
    96  	for _, b := range blocks {
    97  		added := false
    98  		for i, d := range dups {
    99  			if d[0].MinTime != b.MinTime || d[0].MaxTime != b.MaxTime {
   100  				continue
   101  			}
   102  
   103  			if d[0].Compaction.Level != b.Compaction.Level {
   104  				continue
   105  			}
   106  
   107  			if !sameULIDSlices(d[0].Compaction.Sources, b.Compaction.Sources) {
   108  				continue
   109  			}
   110  
   111  			if d[0].Stats != b.Stats {
   112  				continue
   113  			}
   114  
   115  			dups[i] = append(dups[i], b)
   116  			added = true
   117  			break
   118  		}
   119  
   120  		if !added {
   121  			dups = append(dups, []tsdb.BlockMeta{b})
   122  		}
   123  	}
   124  
   125  	for _, d := range dups {
   126  		if len(d) < 2 {
   127  			continue
   128  		}
   129  		res = append(res, d)
   130  	}
   131  	return res
   132  }
   133  
   134  func sameULIDSlices(a, b []ulid.ULID) bool {
   135  	if len(a) != len(b) {
   136  		return false
   137  	}
   138  
   139  	for i, m := range a {
   140  		if m.Compare(b[i]) != 0 {
   141  			return false
   142  		}
   143  	}
   144  	return true
   145  }
   146  
   147  func sprintMetas(ms []tsdb.BlockMeta) string {
   148  	var infos []string
   149  	for _, m := range ms {
   150  		infos = append(infos, fmt.Sprintf("<ulid: %s, mint: %d, maxt: %d, range: %s>", m.ULID, m.MinTime, m.MaxTime, (time.Duration((m.MaxTime-m.MinTime)/1000)*time.Second).String()))
   151  	}
   152  	return fmt.Sprintf("blocks: %d, [%s]", len(ms), strings.Join(infos, ","))
   153  }