github.com/thanos-io/thanos@v0.32.5/pkg/verifier/duplicated_compaction.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package verifier 5 6 import ( 7 "fmt" 8 "strings" 9 "time" 10 11 "github.com/go-kit/log/level" 12 "github.com/oklog/ulid" 13 "github.com/pkg/errors" 14 "github.com/prometheus/prometheus/tsdb" 15 ) 16 17 // DuplicatedCompactionBlocks is for bug fixed in https://github.com/thanos-io/thanos/commit/94e26c63e52ba45b713fd998638d0e7b2492664f. 18 // Bug resulted in source block not being removed immediately after compaction, so we were compacting again and again same sources 19 // until sync-delay passes. 20 // The expected print of this are same overlapped blocks with exactly the same sources, time ranges and stats. 21 // If repair is enabled, all but one duplicates are safely deleted. 22 type DuplicatedCompactionBlocks struct{} 23 24 func (DuplicatedCompactionBlocks) IssueID() string { return "duplicated_compaction" } 25 26 func (DuplicatedCompactionBlocks) VerifyRepair(ctx Context, idMatcher func(ulid.ULID) bool, repair bool) error { 27 if idMatcher != nil { 28 return errors.Errorf("id matching is not supported") 29 } 30 31 level.Info(ctx.Logger).Log("msg", "started verifying issue", "with-repair", repair) 32 33 overlaps, err := fetchOverlaps(ctx, ctx.Fetcher) 34 if err != nil { 35 return errors.Wrap(err, "fetch overlaps") 36 } 37 38 if len(overlaps) == 0 { 39 // All good. 40 return nil 41 } 42 43 // We have overlaps, let's see if they include exact duplicates. If yes, let's put them into distinct set. 44 var ( 45 toKillLookup = map[ulid.ULID]struct{}{} 46 toKill []ulid.ULID 47 ) 48 49 // Loop over label-resolution groups. 50 for k, o := range overlaps { 51 // Loop over overlap group. 52 for r, blocks := range o { 53 dups := duplicatedBlocks(blocks) 54 55 // Loop over duplicates sets. 56 for _, d := range dups { 57 level.Warn(ctx.Logger).Log("msg", "found duplicated blocks", "group", k, "range-min", r.Min, "range-max", r.Max, "kill", sprintMetas(d[1:])) 58 59 for _, m := range d[1:] { 60 if _, ok := toKillLookup[m.ULID]; ok { 61 continue 62 } 63 64 toKillLookup[m.ULID] = struct{}{} 65 toKill = append(toKill, m.ULID) 66 } 67 } 68 69 if len(dups) == 0 { 70 level.Warn(ctx.Logger).Log("msg", "found overlapped blocks, but all of the blocks are unique. Seems like unrelated issue. Ignoring overlap", "group", k, 71 "range", fmt.Sprintf("%v", r), "overlap", sprintMetas(blocks)) 72 } 73 } 74 } 75 76 level.Warn(ctx.Logger).Log("msg", "Found duplicated blocks that are ok to be removed", "ULIDs", fmt.Sprintf("%v", toKill), "num", len(toKill)) 77 if !repair { 78 return nil 79 } 80 81 for i, id := range toKill { 82 if err := BackupAndDelete(ctx, id); err != nil { 83 return err 84 } 85 level.Info(ctx.Logger).Log("msg", "Removed duplicated block", "id", id, "to-be-removed", len(toKill)-(i+1), "removed", i+1) 86 } 87 88 level.Info(ctx.Logger).Log("msg", "Removed all duplicated blocks. You might want to rerun this verify to check if there is still any unrelated overlap") 89 return nil 90 } 91 92 // duplicatedBlocks returns duplicated blocks that have exactly same range, sources and stats. 93 // If block is unique it is not included in the resulted blocs. 94 func duplicatedBlocks(blocks []tsdb.BlockMeta) (res [][]tsdb.BlockMeta) { 95 var dups [][]tsdb.BlockMeta 96 for _, b := range blocks { 97 added := false 98 for i, d := range dups { 99 if d[0].MinTime != b.MinTime || d[0].MaxTime != b.MaxTime { 100 continue 101 } 102 103 if d[0].Compaction.Level != b.Compaction.Level { 104 continue 105 } 106 107 if !sameULIDSlices(d[0].Compaction.Sources, b.Compaction.Sources) { 108 continue 109 } 110 111 if d[0].Stats != b.Stats { 112 continue 113 } 114 115 dups[i] = append(dups[i], b) 116 added = true 117 break 118 } 119 120 if !added { 121 dups = append(dups, []tsdb.BlockMeta{b}) 122 } 123 } 124 125 for _, d := range dups { 126 if len(d) < 2 { 127 continue 128 } 129 res = append(res, d) 130 } 131 return res 132 } 133 134 func sameULIDSlices(a, b []ulid.ULID) bool { 135 if len(a) != len(b) { 136 return false 137 } 138 139 for i, m := range a { 140 if m.Compare(b[i]) != 0 { 141 return false 142 } 143 } 144 return true 145 } 146 147 func sprintMetas(ms []tsdb.BlockMeta) string { 148 var infos []string 149 for _, m := range ms { 150 infos = append(infos, fmt.Sprintf("<ulid: %s, mint: %d, maxt: %d, range: %s>", m.ULID, m.MinTime, m.MaxTime, (time.Duration((m.MaxTime-m.MinTime)/1000)*time.Second).String())) 151 } 152 return fmt.Sprintf("blocks: %d, [%s]", len(ms), strings.Join(infos, ",")) 153 }