github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/indexshipper/compactor/table.go (about) 1 package compactor 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "sync" 9 10 "github.com/go-kit/log" 11 "github.com/go-kit/log/level" 12 "github.com/grafana/dskit/concurrency" 13 "github.com/prometheus/common/model" 14 15 chunk_util "github.com/grafana/loki/pkg/storage/chunk/client/util" 16 "github.com/grafana/loki/pkg/storage/config" 17 "github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/retention" 18 "github.com/grafana/loki/pkg/storage/stores/indexshipper/storage" 19 util_log "github.com/grafana/loki/pkg/util/log" 20 ) 21 22 const ( 23 uploadIndexSetsConcurrency = 10 24 gzipExtension = ".gz" 25 ) 26 27 var errRetentionFileCountNotOne = fmt.Errorf("can't apply retention when index file count is not one") 28 29 type tableExpirationChecker interface { 30 IntervalMayHaveExpiredChunks(interval model.Interval, userID string) bool 31 } 32 33 type IndexCompactor interface { 34 // NewTableCompactor returns a new TableCompactor for compacting a table. 35 // commonIndexSet refers to common index files or in other words multi-tenant index. 36 // existingUserIndexSet refers to existing user specific index files in the storage. 37 // makeEmptyUserIndexSetFunc can be used for creating an empty indexSet for a user 38 // who does not have an index for it in existingUserIndexSet. 39 // periodConfig holds the PeriodConfig for the table. 40 NewTableCompactor( 41 ctx context.Context, 42 commonIndexSet IndexSet, 43 existingUserIndexSet map[string]IndexSet, 44 makeEmptyUserIndexSetFunc MakeEmptyUserIndexSetFunc, 45 periodConfig config.PeriodConfig, 46 ) TableCompactor 47 48 // OpenCompactedIndexFile opens a compressed index file at given path. 49 OpenCompactedIndexFile( 50 ctx context.Context, 51 path, 52 tableName, 53 userID, 54 workingDir string, 55 periodConfig config.PeriodConfig, 56 logger log.Logger, 57 ) ( 58 CompactedIndex, 59 error, 60 ) 61 } 62 63 type TableCompactor interface { 64 // CompactTable compacts the table. 65 // After compaction is done successfully, it should set the new/updated CompactedIndex for relevant IndexSets. 66 CompactTable() (err error) 67 } 68 69 type MakeEmptyUserIndexSetFunc func(userID string) (IndexSet, error) 70 71 type table struct { 72 name string 73 workingDirectory string 74 indexStorageClient storage.Client 75 indexCompactor IndexCompactor 76 tableMarker retention.TableMarker 77 expirationChecker tableExpirationChecker 78 periodConfig config.PeriodConfig 79 80 baseUserIndexSet, baseCommonIndexSet storage.IndexSet 81 82 indexSets map[string]*indexSet 83 usersWithPerUserIndex []string 84 logger log.Logger 85 86 ctx context.Context 87 } 88 89 func newTable(ctx context.Context, workingDirectory string, indexStorageClient storage.Client, 90 indexCompactor IndexCompactor, periodConfig config.PeriodConfig, 91 tableMarker retention.TableMarker, expirationChecker tableExpirationChecker, 92 ) (*table, error) { 93 err := chunk_util.EnsureDirectory(workingDirectory) 94 if err != nil { 95 return nil, err 96 } 97 98 table := table{ 99 ctx: ctx, 100 name: filepath.Base(workingDirectory), 101 workingDirectory: workingDirectory, 102 indexStorageClient: indexStorageClient, 103 indexCompactor: indexCompactor, 104 tableMarker: tableMarker, 105 expirationChecker: expirationChecker, 106 periodConfig: periodConfig, 107 indexSets: map[string]*indexSet{}, 108 baseUserIndexSet: storage.NewIndexSet(indexStorageClient, true), 109 baseCommonIndexSet: storage.NewIndexSet(indexStorageClient, false), 110 } 111 table.logger = log.With(util_log.Logger, "table-name", table.name) 112 113 return &table, nil 114 } 115 116 func (t *table) compact(applyRetention bool) error { 117 indexFiles, usersWithPerUserIndex, err := t.indexStorageClient.ListFiles(t.ctx, t.name, false) 118 if err != nil { 119 return err 120 } 121 122 if len(indexFiles) == 0 && len(usersWithPerUserIndex) == 0 { 123 level.Info(t.logger).Log("msg", "no common index files and user index found") 124 return nil 125 } 126 127 t.usersWithPerUserIndex = usersWithPerUserIndex 128 129 level.Info(t.logger).Log("msg", "listed files", "count", len(indexFiles)) 130 131 defer func() { 132 for _, is := range t.indexSets { 133 is.cleanup() 134 } 135 136 if err := os.RemoveAll(t.workingDirectory); err != nil { 137 level.Error(t.logger).Log("msg", fmt.Sprintf("failed to remove working directory %s", t.workingDirectory), "err", err) 138 } 139 }() 140 141 t.indexSets[""], err = newCommonIndexSet(t.ctx, t.name, t.baseCommonIndexSet, t.workingDirectory, t.logger) 142 if err != nil { 143 return err 144 } 145 146 // userIndexSets is just for passing it to NewTableCompactor since go considers map[string]*indexSet different type than map[string]IndexSet 147 userIndexSets := make(map[string]IndexSet, len(t.usersWithPerUserIndex)) 148 149 for _, userID := range t.usersWithPerUserIndex { 150 var err error 151 t.indexSets[userID], err = newUserIndexSet(t.ctx, t.name, userID, t.baseUserIndexSet, filepath.Join(t.workingDirectory, userID), t.logger) 152 if err != nil { 153 return err 154 } 155 userIndexSets[userID] = t.indexSets[userID] 156 } 157 158 // protect indexSets with mutex so that we are concurrency safe if the TableCompactor calls MakeEmptyUserIndexSetFunc concurrently 159 indexSetsMtx := sync.Mutex{} 160 tableCompactor := t.indexCompactor.NewTableCompactor(t.ctx, t.indexSets[""], userIndexSets, func(userID string) (IndexSet, error) { 161 indexSetsMtx.Lock() 162 defer indexSetsMtx.Unlock() 163 164 var err error 165 t.indexSets[userID], err = newUserIndexSet(t.ctx, t.name, userID, t.baseUserIndexSet, filepath.Join(t.workingDirectory, userID), t.logger) 166 return t.indexSets[userID], err 167 }, t.periodConfig) 168 169 err = tableCompactor.CompactTable() 170 if err != nil { 171 return err 172 } 173 174 if applyRetention { 175 err := t.applyRetention() 176 if err != nil { 177 return err 178 } 179 } 180 181 return t.done() 182 } 183 184 func (t *table) done() error { 185 userIDs := make([]string, 0, len(t.indexSets)) 186 for userID := range t.indexSets { 187 // indexSet.done() uploads the compacted db and cleans up the source index files. 188 // For user index sets, the files from common index sets are also a source of index. 189 // if we cleanup common index sets first, and we fail to upload newly compacted dbs in user index sets, then we will lose data. 190 // To avoid any data loss, we should call done() on common index sets at the end. 191 if userID == "" { 192 continue 193 } 194 195 userIDs = append(userIDs, userID) 196 } 197 198 err := concurrency.ForEachJob(t.ctx, len(userIDs), uploadIndexSetsConcurrency, func(ctx context.Context, idx int) error { 199 return t.indexSets[userIDs[idx]].done() 200 }) 201 if err != nil { 202 return err 203 } 204 205 if commonIndexSet, ok := t.indexSets[""]; ok { 206 if err := commonIndexSet.done(); err != nil { 207 return err 208 } 209 } 210 211 return nil 212 } 213 214 // applyRetention applies retention on the index sets 215 func (t *table) applyRetention() error { 216 tableInterval := retention.ExtractIntervalFromTableName(t.name) 217 // call runRetention on the index sets which may have expired chunks 218 for userID, is := range t.indexSets { 219 // make sure we do not apply retention on common index set which got compacted away to per-user index 220 if userID == "" && is.compactedIndex == nil && is.removeSourceObjects && !is.uploadCompactedDB { 221 continue 222 } 223 224 if !t.expirationChecker.IntervalMayHaveExpiredChunks(tableInterval, userID) { 225 continue 226 } 227 228 // compactedIndex is only set in indexSet when files have been compacted, 229 // so we need to open the compacted index file for applying retention if compactedIndex is nil 230 if is.compactedIndex == nil && len(is.ListSourceFiles()) == 1 { 231 if err := t.openCompactedIndexForRetention(is); err != nil { 232 return err 233 } 234 } 235 236 err := is.runRetention(t.tableMarker) 237 if err != nil { 238 return err 239 } 240 } 241 242 return nil 243 } 244 245 func (t *table) openCompactedIndexForRetention(idxSet *indexSet) error { 246 sourceFiles := idxSet.ListSourceFiles() 247 if len(sourceFiles) != 1 { 248 return errRetentionFileCountNotOne 249 } 250 251 downloadedAt, err := idxSet.GetSourceFile(sourceFiles[0]) 252 if err != nil { 253 return err 254 } 255 256 compactedIndexFile, err := t.indexCompactor.OpenCompactedIndexFile(t.ctx, downloadedAt, t.name, idxSet.userID, filepath.Join(t.workingDirectory, idxSet.userID), t.periodConfig, idxSet.logger) 257 if err != nil { 258 return err 259 } 260 261 idxSet.setCompactedIndex(compactedIndexFile, false, false) 262 263 return nil 264 }