github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/tsdb/compactor.go (about) 1 package tsdb 2 3 import ( 4 "context" 5 "fmt" 6 "math" 7 "os" 8 "sync" 9 "time" 10 "unsafe" 11 12 "github.com/go-kit/log" 13 "github.com/go-kit/log/level" 14 "github.com/grafana/dskit/concurrency" 15 "github.com/prometheus/common/model" 16 "github.com/prometheus/prometheus/model/labels" 17 18 "github.com/grafana/loki/pkg/logproto" 19 "github.com/grafana/loki/pkg/storage/chunk" 20 "github.com/grafana/loki/pkg/storage/config" 21 "github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor" 22 "github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/retention" 23 index_shipper "github.com/grafana/loki/pkg/storage/stores/indexshipper/index" 24 "github.com/grafana/loki/pkg/storage/stores/tsdb/index" 25 ) 26 27 const readDBsConcurrency = 50 28 29 type indexProcessor struct{} 30 31 func NewIndexCompactor() compactor.IndexCompactor { 32 return indexProcessor{} 33 } 34 35 func (i indexProcessor) NewTableCompactor(ctx context.Context, commonIndexSet compactor.IndexSet, existingUserIndexSet map[string]compactor.IndexSet, userIndexSetFactoryFunc compactor.MakeEmptyUserIndexSetFunc, periodConfig config.PeriodConfig) compactor.TableCompactor { 36 return newTableCompactor(ctx, commonIndexSet, existingUserIndexSet, userIndexSetFactoryFunc, periodConfig) 37 } 38 39 func (i indexProcessor) OpenCompactedIndexFile(ctx context.Context, path, tableName, userID, workingDir string, periodConfig config.PeriodConfig, logger log.Logger) (compactor.CompactedIndex, error) { 40 indexFile, err := OpenShippableTSDB(path) 41 if err != nil { 42 return nil, err 43 } 44 45 defer func() { 46 if err := indexFile.Close(); err != nil { 47 level.Error(logger).Log("msg", "failed to close index file", "err", err) 48 } 49 }() 50 51 builder := NewBuilder() 52 err = indexFile.(*TSDBFile).Index.(*TSDBIndex).forSeries(ctx, nil, func(lbls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) { 53 builder.AddSeries(lbls.Copy(), fp, chks) 54 }, labels.MustNewMatcher(labels.MatchEqual, "", "")) 55 if err != nil { 56 return nil, err 57 } 58 59 builder.chunksFinalized = true 60 61 return newCompactedIndex(ctx, tableName, userID, workingDir, periodConfig, builder), nil 62 } 63 64 type tableCompactor struct { 65 commonIndexSet compactor.IndexSet 66 existingUserIndexSet map[string]compactor.IndexSet 67 userIndexSetFactoryFunc compactor.MakeEmptyUserIndexSetFunc 68 ctx context.Context 69 periodConfig config.PeriodConfig 70 compactedIndexes map[string]compactor.CompactedIndex 71 } 72 73 func newTableCompactor( 74 ctx context.Context, 75 commonIndexSet compactor.IndexSet, 76 existingUserIndexSet map[string]compactor.IndexSet, 77 userIndexSetFactoryFunc compactor.MakeEmptyUserIndexSetFunc, 78 periodConfig config.PeriodConfig, 79 ) *tableCompactor { 80 return &tableCompactor{ 81 ctx: ctx, 82 commonIndexSet: commonIndexSet, 83 existingUserIndexSet: existingUserIndexSet, 84 userIndexSetFactoryFunc: userIndexSetFactoryFunc, 85 periodConfig: periodConfig, 86 } 87 } 88 89 func (t *tableCompactor) CompactTable() error { 90 multiTenantIndexes := t.commonIndexSet.ListSourceFiles() 91 92 var multiTenantIndices []Index 93 indicesMtx := sync.Mutex{} 94 95 // concurrently download and open all the multi-tenant indexes 96 err := concurrency.ForEachJob(t.ctx, len(multiTenantIndexes), readDBsConcurrency, func(ctx context.Context, job int) error { 97 downloadedAt, err := t.commonIndexSet.GetSourceFile(multiTenantIndexes[job]) 98 if err != nil { 99 return err 100 } 101 102 defer func() { 103 if err := os.Remove(downloadedAt); err != nil { 104 level.Error(t.commonIndexSet.GetLogger()).Log("msg", "failed to remove downloaded index file", "path", downloadedAt, "err", err) 105 } 106 }() 107 108 idx, err := OpenShippableTSDB(downloadedAt) 109 if err != nil { 110 return err 111 } 112 113 indicesMtx.Lock() 114 defer indicesMtx.Unlock() 115 multiTenantIndices = append(multiTenantIndices, idx.(Index)) 116 117 return nil 118 }) 119 if err != nil { 120 return err 121 } 122 123 var multiTenantIndex Index = NoopIndex{} 124 if len(multiTenantIndices) > 0 { 125 var err error 126 multiTenantIndex, err = NewMultiIndex(multiTenantIndices...) 127 if err != nil { 128 return err 129 } 130 } 131 132 // find all the user ids from the multi-tenant indexes using TenantLabel. 133 userIDs, err := multiTenantIndex.LabelValues(t.ctx, "", 0, math.MaxInt64, TenantLabel) 134 if err != nil { 135 return err 136 } 137 138 // go through all the users having index in the multi-tenant indexes and setup builder for each user 139 // builder would combine users index from multi-tenant indexes and the existing compacted index(es) 140 t.compactedIndexes = make(map[string]compactor.CompactedIndex, len(userIDs)) 141 for _, userID := range userIDs { 142 existingUserIndexSet, ok := t.existingUserIndexSet[userID] 143 if !ok { 144 var err error 145 existingUserIndexSet, err = t.userIndexSetFactoryFunc(userID) 146 if err != nil { 147 return err 148 } 149 } 150 151 builder, err := setupBuilder(t.ctx, userID, existingUserIndexSet, multiTenantIndices) 152 if err != nil { 153 return err 154 } 155 156 compactedIndex := newCompactedIndex(t.ctx, existingUserIndexSet.GetTableName(), userID, existingUserIndexSet.GetWorkingDir(), t.periodConfig, builder) 157 t.compactedIndexes[userID] = compactedIndex 158 159 if err := existingUserIndexSet.SetCompactedIndex(compactedIndex, true); err != nil { 160 return err 161 } 162 } 163 164 // go through existingUserIndexSet and find the ones that were not initialized now due to no updates and 165 // have multiple index files in the storage to merge them into a single index file. 166 for userID, srcIdxSet := range t.existingUserIndexSet { 167 if _, ok := t.compactedIndexes[userID]; ok || len(srcIdxSet.ListSourceFiles()) <= 1 { 168 continue 169 } 170 171 builder, err := setupBuilder(t.ctx, userID, srcIdxSet, []Index{}) 172 if err != nil { 173 return err 174 } 175 176 compactedIndex := newCompactedIndex(t.ctx, srcIdxSet.GetTableName(), userID, srcIdxSet.GetWorkingDir(), t.periodConfig, builder) 177 t.compactedIndexes[userID] = compactedIndex 178 if err := srcIdxSet.SetCompactedIndex(compactedIndex, true); err != nil { 179 return err 180 } 181 } 182 183 if len(multiTenantIndices) > 0 { 184 if err := t.commonIndexSet.SetCompactedIndex(nil, true); err != nil { 185 return err 186 } 187 } 188 return nil 189 } 190 191 // setupBuilder creates a Builder for a single user. 192 // It combines the users index from multiTenantIndexes and its existing compacted index(es) 193 func setupBuilder(ctx context.Context, userID string, sourceIndexSet compactor.IndexSet, multiTenantIndexes []Index) (*Builder, error) { 194 sourceIndexes := sourceIndexSet.ListSourceFiles() 195 builder := NewBuilder() 196 197 // add users index from multi-tenant indexes to the builder 198 for _, idx := range multiTenantIndexes { 199 err := idx.(*TSDBFile).Index.(*TSDBIndex).forSeries(ctx, nil, func(lbls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) { 200 builder.AddSeries(withoutTenantLabel(lbls.Copy()), fp, chks) 201 }, withTenantLabelMatcher(userID, []*labels.Matcher{})...) 202 if err != nil { 203 return nil, err 204 } 205 } 206 207 // download all the existing compacted indexes and add them to the builder 208 for _, sourceIndex := range sourceIndexes { 209 path, err := sourceIndexSet.GetSourceFile(sourceIndex) 210 if err != nil { 211 return nil, err 212 } 213 214 defer func() { 215 if err := os.Remove(path); err != nil { 216 level.Error(sourceIndexSet.GetLogger()).Log("msg", "error removing source index file", "err", err) 217 } 218 }() 219 220 indexFile, err := OpenShippableTSDB(path) 221 if err != nil { 222 return nil, err 223 } 224 225 defer func() { 226 if err := indexFile.Close(); err != nil { 227 level.Error(sourceIndexSet.GetLogger()).Log("msg", "failed to close index file", "err", err) 228 } 229 }() 230 231 err = indexFile.(*TSDBFile).Index.(*TSDBIndex).forSeries(ctx, nil, func(lbls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) { 232 builder.AddSeries(lbls.Copy(), fp, chks) 233 }, labels.MustNewMatcher(labels.MatchEqual, "", "")) 234 if err != nil { 235 return nil, err 236 } 237 } 238 239 // finalize the chunks to remove the duplicates and sort them 240 builder.FinalizeChunks() 241 242 return builder, nil 243 } 244 245 type compactedIndex struct { 246 ctx context.Context 247 userID string 248 builder *Builder 249 workingDir string 250 tableInterval model.Interval 251 periodConfig config.PeriodConfig 252 253 indexChunks []chunk.Chunk 254 deleteChunks map[string][]index.ChunkMeta 255 seriesToCleanup map[string]struct{} 256 } 257 258 func newCompactedIndex(ctx context.Context, tableName, userID, workingDir string, periodConfig config.PeriodConfig, builder *Builder) *compactedIndex { 259 return &compactedIndex{ 260 ctx: ctx, 261 userID: userID, 262 builder: builder, 263 workingDir: workingDir, 264 periodConfig: periodConfig, 265 tableInterval: retention.ExtractIntervalFromTableName(tableName), 266 deleteChunks: map[string][]index.ChunkMeta{}, 267 seriesToCleanup: map[string]struct{}{}, 268 } 269 } 270 271 // ForEachChunk iterates over all the chunks in the builder and calls the callback function. 272 func (c *compactedIndex) ForEachChunk(ctx context.Context, callback retention.ChunkEntryCallback) error { 273 schemaCfg := config.SchemaConfig{ 274 Configs: []config.PeriodConfig{c.periodConfig}, 275 } 276 277 chunkEntry := retention.ChunkEntry{ 278 ChunkRef: retention.ChunkRef{ 279 UserID: getUnsafeBytes(c.userID), 280 }, 281 } 282 logprotoChunkRef := logproto.ChunkRef{ 283 UserID: c.userID, 284 } 285 for seriesID, stream := range c.builder.streams { 286 logprotoChunkRef.Fingerprint = uint64(stream.fp) 287 chunkEntry.SeriesID = getUnsafeBytes(seriesID) 288 chunkEntry.Labels = withoutTenantLabel(stream.labels) 289 290 for i := 0; i < len(stream.chunks) && ctx.Err() == nil; i++ { 291 chk := stream.chunks[i] 292 logprotoChunkRef.From = chk.From() 293 logprotoChunkRef.Through = chk.Through() 294 logprotoChunkRef.Checksum = chk.Checksum 295 296 chunkEntry.ChunkID = getUnsafeBytes(schemaCfg.ExternalKey(logprotoChunkRef)) 297 chunkEntry.From = logprotoChunkRef.From 298 chunkEntry.Through = logprotoChunkRef.Through 299 300 deleteChunk, err := callback(chunkEntry) 301 if err != nil { 302 return err 303 } 304 305 if deleteChunk { 306 // add the chunk to the list of chunks to delete which would be taken care of while building the index. 307 c.deleteChunks[seriesID] = append(c.deleteChunks[seriesID], chk) 308 } 309 } 310 } 311 312 return ctx.Err() 313 } 314 315 // IndexChunk adds the chunk to the list of chunks to index. 316 // Before accepting the chunk it checks if it falls within the tableInterval and rejects it if not. 317 func (c *compactedIndex) IndexChunk(chk chunk.Chunk) (bool, error) { 318 if chk.From > c.tableInterval.End || c.tableInterval.Start > chk.Through { 319 return false, nil 320 } 321 322 c.indexChunks = append(c.indexChunks, chk) 323 324 return true, nil 325 } 326 327 // CleanupSeries removes the series from the builder(including its chunks) and deletes the list of chunks lined up for deletion. 328 func (c *compactedIndex) CleanupSeries(_ []byte, lbls labels.Labels) error { 329 seriesID := lbls.String() 330 if _, ok := c.builder.streams[seriesID]; !ok { 331 return fmt.Errorf("series cleanup not allowed on non-existing series %s", seriesID) 332 } 333 delete(c.builder.streams, seriesID) 334 delete(c.deleteChunks, seriesID) 335 return nil 336 } 337 338 func (c *compactedIndex) Cleanup() {} 339 340 // ToIndexFile creates an indexFile from the chunksmetas stored in the builder. 341 // Before building the index, it takes care of the lined up updates i.e deletes and adding of new chunks. 342 func (c *compactedIndex) ToIndexFile() (index_shipper.Index, error) { 343 for seriesID, chks := range c.deleteChunks { 344 for _, chk := range chks { 345 chunkFound, err := c.builder.DropChunk(seriesID, chk) 346 if err != nil { 347 return nil, err 348 } 349 if !chunkFound { 350 return nil, fmt.Errorf("could not drop non-existent chunk %x from series %s", chk, seriesID) 351 } 352 } 353 } 354 c.deleteChunks = nil 355 356 for _, chk := range c.indexChunks { 357 err := c.builder.InsertChunk(chk.Metric.String(), index.ChunkMeta{ 358 Checksum: chk.Checksum, 359 MinTime: int64(chk.From), 360 MaxTime: int64(chk.Through), 361 KB: uint32(chk.Size()) / (1 << 10), 362 Entries: uint32(chk.Data.Entries()), 363 }) 364 if err != nil { 365 return nil, err 366 } 367 } 368 c.indexChunks = nil 369 370 id, err := c.builder.Build(c.ctx, c.workingDir, func(from, through model.Time, checksum uint32) Identifier { 371 id := SingleTenantTSDBIdentifier{ 372 TS: time.Now(), 373 From: from, 374 Through: through, 375 Checksum: checksum, 376 } 377 return newPrefixedIdentifier(id, c.workingDir, "") 378 }) 379 if err != nil { 380 return nil, err 381 } 382 383 return NewShippableTSDBFile(id, false) 384 } 385 386 func getUnsafeBytes(s string) []byte { 387 return *((*[]byte)(unsafe.Pointer(&s))) 388 }