github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/indexshipper/downloads/index_set.go (about) 1 package downloads 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "io/ioutil" 9 "os" 10 "path/filepath" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/go-kit/log" 16 "github.com/go-kit/log/level" 17 "github.com/grafana/dskit/concurrency" 18 19 "github.com/grafana/loki/pkg/storage/chunk/client/util" 20 "github.com/grafana/loki/pkg/storage/stores/indexshipper/index" 21 "github.com/grafana/loki/pkg/storage/stores/indexshipper/storage" 22 util_log "github.com/grafana/loki/pkg/util/log" 23 "github.com/grafana/loki/pkg/util/spanlogger" 24 ) 25 26 const ( 27 gzipExtension = ".gz" 28 maxSyncRetries = 1 29 ) 30 31 var errIndexListCacheTooStale = fmt.Errorf("index list cache too stale") 32 33 type IndexSet interface { 34 Init(forQuerying bool) error 35 Close() 36 ForEach(ctx context.Context, callback index.ForEachIndexCallback) error 37 DropAllDBs() error 38 Err() error 39 LastUsedAt() time.Time 40 UpdateLastUsedAt() 41 Sync(ctx context.Context) (err error) 42 AwaitReady(ctx context.Context) error 43 } 44 45 // indexSet is a collection of multiple files created for a same table by various ingesters. 46 // All the public methods are concurrency safe and take care of mutexes to avoid any data race. 47 type indexSet struct { 48 openIndexFileFunc index.OpenIndexFileFunc 49 baseIndexSet storage.IndexSet 50 tableName, userID string 51 cacheLocation string 52 logger log.Logger 53 54 lastUsedAt time.Time 55 index map[string]index.Index 56 indexMtx *mtxWithReadiness 57 err error 58 59 cancelFunc context.CancelFunc // helps with cancellation of initialization if we are asked to stop. 60 } 61 62 func NewIndexSet(tableName, userID, cacheLocation string, baseIndexSet storage.IndexSet, openIndexFileFunc index.OpenIndexFileFunc, 63 logger log.Logger) (IndexSet, error) { 64 if baseIndexSet.IsUserBasedIndexSet() && userID == "" { 65 return nil, fmt.Errorf("userID must not be empty") 66 } else if !baseIndexSet.IsUserBasedIndexSet() && userID != "" { 67 return nil, fmt.Errorf("userID must be empty") 68 } 69 70 err := util.EnsureDirectory(cacheLocation) 71 if err != nil { 72 return nil, err 73 } 74 75 is := indexSet{ 76 openIndexFileFunc: openIndexFileFunc, 77 baseIndexSet: baseIndexSet, 78 tableName: tableName, 79 userID: userID, 80 cacheLocation: cacheLocation, 81 logger: logger, 82 lastUsedAt: time.Now(), 83 index: map[string]index.Index{}, 84 indexMtx: newMtxWithReadiness(), 85 cancelFunc: func() {}, 86 } 87 88 return &is, nil 89 } 90 91 // Init downloads all the db files for the table from object storage. 92 func (t *indexSet) Init(forQuerying bool) (err error) { 93 // Using background context to avoid cancellation of download when request times out. 94 // We would anyways need the files for serving next requests. 95 ctx, cancelFunc := context.WithTimeout(context.Background(), downloadTimeout) 96 t.cancelFunc = cancelFunc 97 98 logger := spanlogger.FromContextWithFallback(ctx, t.logger) 99 100 defer func() { 101 if err != nil { 102 level.Error(t.logger).Log("msg", fmt.Sprintf("failed to initialize table %s, cleaning it up", t.tableName), "err", err) 103 t.err = err 104 105 // cleaning up files due to error to avoid returning invalid results. 106 for fileName := range t.index { 107 if err := t.cleanupDB(fileName); err != nil { 108 level.Error(t.logger).Log("msg", "failed to cleanup partially downloaded file", "filename", fileName, "err", err) 109 } 110 } 111 } 112 t.cancelFunc() 113 t.indexMtx.markReady() 114 }() 115 116 filesInfo, err := ioutil.ReadDir(t.cacheLocation) 117 if err != nil { 118 return err 119 } 120 121 // open all the locally present files first to avoid downloading them again during sync operation below. 122 for _, fileInfo := range filesInfo { 123 if fileInfo.IsDir() { 124 continue 125 } 126 127 fullPath := filepath.Join(t.cacheLocation, fileInfo.Name()) 128 // if we fail to open an index file, lets skip it and let sync operation re-download the file from storage. 129 idx, err := t.openIndexFileFunc(fullPath) 130 if err != nil { 131 level.Error(t.logger).Log("msg", fmt.Sprintf("failed to open existing index file %s, removing the file and continuing without it to let the sync operation catch up", fullPath), "err", err) 132 // Sometimes files get corrupted when the process gets killed in the middle of a download operation which can cause problems in reading the file. 133 // Implementation of openIndexFileFunc should take care of gracefully handling corrupted files. 134 // Let us just remove the file and let the sync operation re-download it. 135 if err := os.Remove(fullPath); err != nil { 136 level.Error(t.logger).Log("msg", fmt.Sprintf("failed to remove index file %s which failed to open", fullPath)) 137 } 138 continue 139 } 140 141 t.index[fileInfo.Name()] = idx 142 } 143 144 level.Debug(logger).Log("msg", fmt.Sprintf("opened %d local files, now starting sync operation", len(t.index))) 145 146 // sync the table to get new files and remove the deleted ones from storage. 147 err = t.syncWithRetry(ctx, false, forQuerying) 148 if err != nil { 149 return 150 } 151 152 level.Debug(logger).Log("msg", "finished syncing files") 153 154 return 155 } 156 157 // Close Closes references to all the index. 158 func (t *indexSet) Close() { 159 // stop the initialization if it is still ongoing. 160 t.cancelFunc() 161 162 err := t.indexMtx.lock(context.Background()) 163 if err != nil { 164 level.Error(t.logger).Log("msg", "failed to acquire lock for closing index", "err", err) 165 return 166 } 167 defer t.indexMtx.unlock() 168 169 for name, db := range t.index { 170 if err := db.Close(); err != nil { 171 level.Error(t.logger).Log("msg", fmt.Sprintf("failed to close file %s", name), "err", err) 172 } 173 } 174 175 t.index = map[string]index.Index{} 176 } 177 178 func (t *indexSet) ForEach(ctx context.Context, callback index.ForEachIndexCallback) error { 179 if err := t.indexMtx.rLock(ctx); err != nil { 180 return err 181 } 182 defer t.indexMtx.rUnlock() 183 184 logger := util_log.WithContext(ctx, t.logger) 185 level.Debug(logger).Log("index-files-count", len(t.index)) 186 187 for _, idx := range t.index { 188 if err := callback(t.userID == "", idx); err != nil { 189 return err 190 } 191 } 192 193 return nil 194 } 195 196 // DropAllDBs closes reference to all the open index and removes the local files. 197 func (t *indexSet) DropAllDBs() error { 198 err := t.indexMtx.lock(context.Background()) 199 if err != nil { 200 return err 201 } 202 defer t.indexMtx.unlock() 203 204 for fileName := range t.index { 205 if err := t.cleanupDB(fileName); err != nil { 206 return err 207 } 208 } 209 210 return os.RemoveAll(t.cacheLocation) 211 } 212 213 // Err returns the err which is usually set when there was any issue in Init. 214 func (t *indexSet) Err() error { 215 return t.err 216 } 217 218 // LastUsedAt returns the time at which table was last used for querying. 219 func (t *indexSet) LastUsedAt() time.Time { 220 return t.lastUsedAt 221 } 222 223 func (t *indexSet) UpdateLastUsedAt() { 224 t.lastUsedAt = time.Now() 225 } 226 227 // cleanupDB closes and removes the local file. 228 func (t *indexSet) cleanupDB(fileName string) error { 229 df, ok := t.index[fileName] 230 if !ok { 231 return fmt.Errorf("file %s not found in files collection for cleaning up", fileName) 232 } 233 234 filePath := df.Path() 235 236 if err := df.Close(); err != nil { 237 return err 238 } 239 240 delete(t.index, fileName) 241 242 return os.Remove(filePath) 243 } 244 245 func (t *indexSet) Sync(ctx context.Context) (err error) { 246 return t.syncWithRetry(ctx, true, false) 247 } 248 249 // syncWithRetry runs a sync with upto maxSyncRetries on failure 250 func (t *indexSet) syncWithRetry(ctx context.Context, lock, bypassListCache bool) error { 251 var err error 252 for i := 0; i <= maxSyncRetries; i++ { 253 err = t.sync(ctx, lock, bypassListCache) 254 if err == nil { 255 return nil 256 } 257 258 if errors.Is(err, errIndexListCacheTooStale) && i < maxSyncRetries { 259 level.Info(t.logger).Log("msg", "we have hit stale list cache, refreshing it before retrying") 260 t.baseIndexSet.RefreshIndexListCache(ctx) 261 } 262 263 level.Error(t.logger).Log("msg", "sync failed, retrying it", "err", err) 264 } 265 266 return err 267 } 268 269 // sync downloads updated and new files from the storage relevant for the table and removes the deleted ones 270 func (t *indexSet) sync(ctx context.Context, lock, bypassListCache bool) (err error) { 271 level.Debug(t.logger).Log("msg", fmt.Sprintf("syncing files for table %s", t.tableName)) 272 273 toDownload, toDelete, err := t.checkStorageForUpdates(ctx, lock, bypassListCache) 274 if err != nil { 275 return err 276 } 277 278 level.Debug(t.logger).Log("msg", fmt.Sprintf("updates for table %s. toDownload: %s, toDelete: %s", t.tableName, toDownload, toDelete)) 279 280 downloadedFiles, err := t.doConcurrentDownload(ctx, toDownload) 281 if err != nil { 282 return err 283 } 284 285 // if we did not bypass list cache and skipped downloading all the new files due to them being removed by compaction, 286 // it means the cache is not valid anymore since compaction would have happened after last index list cache refresh. 287 // Let us return error to ask the caller to re-run the sync after the list cache refresh. 288 if !bypassListCache && len(downloadedFiles) == 0 && len(toDownload) > 0 { 289 level.Error(t.logger).Log("msg", "we skipped downloading all the new files, possibly removed by compaction", "files", toDownload) 290 return errIndexListCacheTooStale 291 } 292 293 if lock { 294 err = t.indexMtx.lock(ctx) 295 if err != nil { 296 return err 297 } 298 defer t.indexMtx.unlock() 299 } 300 301 for _, fileName := range downloadedFiles { 302 filePath := filepath.Join(t.cacheLocation, fileName) 303 idx, err := t.openIndexFileFunc(filePath) 304 if err != nil { 305 return err 306 } 307 308 t.index[fileName] = idx 309 } 310 311 for _, db := range toDelete { 312 err := t.cleanupDB(db) 313 if err != nil { 314 return err 315 } 316 } 317 318 return nil 319 } 320 321 // checkStorageForUpdates compares files from cache with storage and builds the list of files to be downloaded from storage and to be deleted from cache 322 func (t *indexSet) checkStorageForUpdates(ctx context.Context, lock, bypassListCache bool) (toDownload []storage.IndexFile, toDelete []string, err error) { 323 // listing tables from store 324 var files []storage.IndexFile 325 326 files, err = t.baseIndexSet.ListFiles(ctx, t.tableName, t.userID, bypassListCache) 327 if err != nil { 328 return 329 } 330 331 listedDBs := make(map[string]struct{}, len(files)) 332 333 if lock { 334 err = t.indexMtx.rLock(ctx) 335 if err != nil { 336 return nil, nil, err 337 } 338 defer t.indexMtx.rUnlock() 339 } 340 341 for _, file := range files { 342 normalized := strings.TrimSuffix(file.Name, gzipExtension) 343 listedDBs[normalized] = struct{}{} 344 345 // Checking whether file was already downloaded, if not, download it. 346 // We do not ever upload files in the object store with the same name but different contents so we do not consider downloading modified files again. 347 _, ok := t.index[normalized] 348 if !ok { 349 toDownload = append(toDownload, file) 350 } 351 } 352 353 for db := range t.index { 354 if _, isOK := listedDBs[db]; !isOK { 355 toDelete = append(toDelete, db) 356 } 357 } 358 359 return 360 } 361 362 func (t *indexSet) AwaitReady(ctx context.Context) error { 363 return t.indexMtx.awaitReady(ctx) 364 } 365 366 func (t *indexSet) downloadFileFromStorage(ctx context.Context, fileName, folderPathForTable string) (string, error) { 367 decompress := storage.IsCompressedFile(fileName) 368 dst := filepath.Join(folderPathForTable, fileName) 369 if decompress { 370 dst = strings.Trim(dst, gzipExtension) 371 } 372 return filepath.Base(dst), storage.DownloadFileFromStorage( 373 dst, 374 decompress, 375 true, 376 storage.LoggerWithFilename(t.logger, fileName), 377 func() (io.ReadCloser, error) { 378 return t.baseIndexSet.GetFile(ctx, t.tableName, t.userID, fileName) 379 }, 380 ) 381 } 382 383 // doConcurrentDownload downloads objects(files) concurrently. It ignores only missing file errors caused by removal of file by compaction. 384 // It returns the names of the files downloaded successfully and leaves it upto the caller to open those files. 385 func (t *indexSet) doConcurrentDownload(ctx context.Context, files []storage.IndexFile) ([]string, error) { 386 downloadedFiles := make([]string, 0, len(files)) 387 downloadedFilesMtx := sync.Mutex{} 388 389 err := concurrency.ForEachJob(ctx, len(files), maxDownloadConcurrency, func(ctx context.Context, idx int) error { 390 fileName, err := t.downloadFileFromStorage(ctx, files[idx].Name, t.cacheLocation) 391 if err != nil { 392 if t.baseIndexSet.IsFileNotFoundErr(err) { 393 level.Info(t.logger).Log("msg", fmt.Sprintf("ignoring missing file %s, possibly removed during compaction", fileName)) 394 return nil 395 } 396 return err 397 } 398 399 downloadedFilesMtx.Lock() 400 downloadedFiles = append(downloadedFiles, fileName) 401 downloadedFilesMtx.Unlock() 402 403 return nil 404 }) 405 if err != nil { 406 return nil, err 407 } 408 409 return downloadedFiles, nil 410 }