github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/tsdb/builder.go (about) 1 package tsdb 2 3 import ( 4 "context" 5 "fmt" 6 "math/rand" 7 "os" 8 "path/filepath" 9 "sort" 10 11 "github.com/prometheus/common/model" 12 "github.com/prometheus/prometheus/model/labels" 13 "github.com/prometheus/prometheus/storage" 14 15 chunk_util "github.com/grafana/loki/pkg/storage/chunk/client/util" 16 "github.com/grafana/loki/pkg/storage/stores/tsdb/index" 17 ) 18 19 // Builder is a helper used to create tsdb indices. 20 // It can accept streams in any order and will create the tsdb 21 // index appropriately via `Build()` 22 // It can even receive multiple writes for the same stream with the caveat 23 // that chunks must be added in order and not duplicated 24 type Builder struct { 25 streams map[string]*stream 26 chunksFinalized bool 27 } 28 29 type stream struct { 30 labels labels.Labels 31 fp model.Fingerprint 32 chunks index.ChunkMetas 33 } 34 35 func NewBuilder() *Builder { 36 return &Builder{streams: make(map[string]*stream)} 37 } 38 39 func (b *Builder) AddSeries(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) { 40 id := ls.String() 41 s, ok := b.streams[id] 42 if !ok { 43 s = &stream{ 44 labels: ls, 45 fp: fp, 46 } 47 b.streams[id] = s 48 } 49 50 s.chunks = append(s.chunks, chks...) 51 } 52 53 func (b *Builder) FinalizeChunks() { 54 for id := range b.streams { 55 b.streams[id].chunks = b.streams[id].chunks.Finalize() 56 } 57 b.chunksFinalized = true 58 } 59 60 func (b *Builder) InsertChunk(streamID string, chk index.ChunkMeta) error { 61 if !b.chunksFinalized { 62 return fmt.Errorf("chunk insertion is only allowed on finalized chunks") 63 } 64 65 s, ok := b.streams[streamID] 66 if !ok { 67 return fmt.Errorf("chunk insertion is only allowed on existing streams") 68 } 69 70 s.chunks = s.chunks.Add(chk) 71 return nil 72 } 73 74 func (b *Builder) DropChunk(streamID string, chk index.ChunkMeta) (bool, error) { 75 if !b.chunksFinalized { 76 return false, fmt.Errorf("dropping of chunk is only allowed on finalized chunks") 77 } 78 79 s, ok := b.streams[streamID] 80 if !ok { 81 return false, fmt.Errorf("dropping of chunk is only allowed on existing streams") 82 } 83 84 var chunkFound bool 85 s.chunks, chunkFound = s.chunks.Drop(chk) 86 return chunkFound, nil 87 } 88 89 func (b *Builder) Build( 90 ctx context.Context, 91 scratchDir string, 92 // Determines how to create the resulting Identifier and file name. 93 // This is variable as we use Builder for multiple reasons, 94 // such as building multi-tenant tsdbs on the ingester 95 // and per tenant ones during compaction 96 createFn func(from, through model.Time, checksum uint32) Identifier, 97 ) (id Identifier, err error) { 98 // Ensure the parent dir exists (i.e. index/<bucket>/<tenant>/) 99 if scratchDir != "" { 100 if err := chunk_util.EnsureDirectory(scratchDir); err != nil { 101 return id, err 102 } 103 } 104 105 // First write tenant/index-bounds-random.staging 106 rng := rand.Int63() 107 name := fmt.Sprintf("%s-%x.staging", index.IndexFilename, rng) 108 tmpPath := filepath.Join(scratchDir, name) 109 110 writer, err := index.NewWriter(ctx, tmpPath) 111 if err != nil { 112 return id, err 113 } 114 // TODO(owen-d): multithread 115 116 // Sort series 117 streams := make([]*stream, 0, len(b.streams)) 118 for _, s := range b.streams { 119 streams = append(streams, s) 120 } 121 122 // Use the supplied fingerprints instead of hashing labels for two reasons: 123 // 1) Correctness: fingerprints differ from label hashes because 124 // we add a synthesized __loki_tennat__ label, which is eventually compacted away. 125 // 2) Speed: No hashing required 126 sort.Slice(streams, func(i, j int) bool { 127 if streams[i].fp != streams[j].fp { 128 return streams[i].fp < streams[j].fp 129 } 130 return labels.Compare(streams[i].labels, streams[j].labels) < 0 131 }) 132 133 // Build symbols 134 symbolsMap := make(map[string]struct{}) 135 for _, s := range streams { 136 for _, l := range s.labels { 137 symbolsMap[l.Name] = struct{}{} 138 symbolsMap[l.Value] = struct{}{} 139 } 140 } 141 142 // Sort symbols 143 symbols := make([]string, 0, len(symbolsMap)) 144 for s := range symbolsMap { 145 symbols = append(symbols, s) 146 } 147 sort.Strings(symbols) 148 149 // Add symbols 150 for _, symbol := range symbols { 151 if err := writer.AddSymbol(symbol); err != nil { 152 return id, err 153 } 154 } 155 156 // Add series 157 for i, s := range streams { 158 if !b.chunksFinalized { 159 s.chunks = s.chunks.Finalize() 160 } 161 if err := writer.AddSeries(storage.SeriesRef(i), s.labels, s.fp, s.chunks...); err != nil { 162 return id, err 163 } 164 } 165 166 if err := writer.Close(); err != nil { 167 return id, err 168 } 169 170 reader, err := index.NewFileReader(tmpPath) 171 if err != nil { 172 return id, err 173 } 174 175 from, through := reader.Bounds() 176 177 // load the newly compacted index to grab checksum, promptly close 178 dst := createFn(model.Time(from), model.Time(through), reader.Checksum()) 179 180 reader.Close() 181 defer func() { 182 if err != nil { 183 os.RemoveAll(tmpPath) 184 } 185 }() 186 187 if err := chunk_util.EnsureDirectory(filepath.Dir(dst.Path())); err != nil { 188 return id, err 189 } 190 dstPath := dst.Path() 191 if err := os.Rename(tmpPath, dstPath); err != nil { 192 return id, err 193 } 194 195 return dst, nil 196 }