github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/shipper/index/table.go (about) 1 package index 2 3 import ( 4 "context" 5 "fmt" 6 "io/ioutil" 7 "os" 8 "path" 9 "path/filepath" 10 "regexp" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/go-kit/log/level" 16 "github.com/grafana/dskit/tenant" 17 "go.etcd.io/bbolt" 18 19 "github.com/grafana/loki/pkg/storage/chunk/client/local" 20 chunk_util "github.com/grafana/loki/pkg/storage/chunk/client/util" 21 "github.com/grafana/loki/pkg/storage/stores/series/index" 22 "github.com/grafana/loki/pkg/storage/stores/shipper/index/indexfile" 23 shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util" 24 util_log "github.com/grafana/loki/pkg/util/log" 25 ) 26 27 const ( 28 // create a new db sharded by time based on when write request is received 29 ShardDBsByDuration = 15 * time.Minute 30 31 // a snapshot file is created with name of the db + snapshotFileSuffix periodically for read operation. 32 snapshotFileSuffix = ".snapshot" 33 ) 34 35 type BoltDBIndexClient interface { 36 QueryWithCursor(_ context.Context, c *bbolt.Cursor, query index.Query, callback index.QueryPagesCallback) error 37 WriteToDB(ctx context.Context, db *bbolt.DB, bucketName []byte, writes local.TableWrites) error 38 } 39 40 type dbSnapshot struct { 41 boltdb *bbolt.DB 42 writesCount int 43 } 44 45 // Table is a collection of multiple index files created for a same table by the ingester. 46 // It is used on the write path for writing the index. 47 // All the public methods are concurrency safe and take care of mutexes to avoid any data race. 48 type Table struct { 49 name string 50 path string 51 uploader string 52 indexShipper Shipper 53 makePerTenantBuckets bool 54 55 dbs map[string]*bbolt.DB 56 dbsMtx sync.RWMutex 57 58 dbSnapshots map[string]*dbSnapshot 59 dbSnapshotsMtx sync.RWMutex 60 61 modifyShardsSince int64 62 } 63 64 // NewTable create a new Table without looking for any existing local dbs belonging to the table. 65 func NewTable(path, uploader string, indexShipper Shipper, makePerTenantBuckets bool) (*Table, error) { 66 err := chunk_util.EnsureDirectory(path) 67 if err != nil { 68 return nil, err 69 } 70 71 return newTableWithDBs(map[string]*bbolt.DB{}, path, uploader, indexShipper, makePerTenantBuckets) 72 } 73 74 // LoadTable loads local dbs belonging to the table and creates a new Table with references to dbs if there are any otherwise it doesn't create a table 75 func LoadTable(path, uploader string, indexShipper Shipper, makePerTenantBuckets bool, metrics *metrics) (*Table, error) { 76 dbs, err := loadBoltDBsFromDir(path, metrics) 77 if err != nil { 78 return nil, err 79 } 80 81 if len(dbs) == 0 { 82 return nil, nil 83 } 84 85 return newTableWithDBs(dbs, path, uploader, indexShipper, makePerTenantBuckets) 86 } 87 88 func newTableWithDBs(dbs map[string]*bbolt.DB, path, uploader string, indexShipper Shipper, makePerTenantBuckets bool) (*Table, error) { 89 return &Table{ 90 name: filepath.Base(path), 91 path: path, 92 uploader: uploader, 93 indexShipper: indexShipper, 94 dbs: dbs, 95 dbSnapshots: map[string]*dbSnapshot{}, 96 modifyShardsSince: time.Now().Unix(), 97 makePerTenantBuckets: makePerTenantBuckets, 98 }, nil 99 } 100 101 func (lt *Table) Snapshot() error { 102 lt.dbsMtx.RLock() 103 defer lt.dbsMtx.RUnlock() 104 105 lt.dbSnapshotsMtx.Lock() 106 defer lt.dbSnapshotsMtx.Unlock() 107 108 level.Debug(util_log.Logger).Log("msg", fmt.Sprintf("snapshotting table %s", lt.name)) 109 110 for name, db := range lt.dbs { 111 level.Debug(util_log.Logger).Log("msg", fmt.Sprintf("checking db %s for snapshot", name)) 112 srcWriteCount := 0 113 err := db.View(func(tx *bbolt.Tx) error { 114 srcWriteCount = db.Stats().TxStats.Write 115 return nil 116 }) 117 if err != nil { 118 return err 119 } 120 121 snapshot, ok := lt.dbSnapshots[name] 122 filePath := path.Join(lt.path, fmt.Sprintf("%s%s", name, snapshotFileSuffix)) 123 124 if !ok { 125 snapshot = &dbSnapshot{} 126 } else if snapshot.writesCount == srcWriteCount { 127 continue 128 } else { 129 if err := snapshot.boltdb.Close(); err != nil { 130 return err 131 } 132 133 if err := os.Remove(filePath); err != nil { 134 return err 135 } 136 } 137 138 f, err := os.Create(filePath) 139 if err != nil { 140 return err 141 } 142 143 err = db.View(func(tx *bbolt.Tx) (err error) { 144 _, err = tx.WriteTo(f) 145 return 146 }) 147 if err != nil { 148 return err 149 } 150 151 // flush the file to disk. 152 if err := f.Sync(); err != nil { 153 return err 154 } 155 156 if err := f.Close(); err != nil { 157 return err 158 } 159 160 snapshot.boltdb, err = shipper_util.SafeOpenBoltdbFile(filePath) 161 if err != nil { 162 return err 163 } 164 165 snapshot.writesCount = srcWriteCount 166 lt.dbSnapshots[name] = snapshot 167 168 level.Debug(util_log.Logger).Log("msg", fmt.Sprintf("finished snaphotting db %s", name)) 169 } 170 171 level.Debug(util_log.Logger).Log("msg", fmt.Sprintf("finished snapshotting table %s", lt.name)) 172 173 return nil 174 } 175 176 func (lt *Table) ForEach(_ context.Context, callback func(boltdb *bbolt.DB) error) error { 177 lt.dbSnapshotsMtx.RLock() 178 defer lt.dbSnapshotsMtx.RUnlock() 179 180 for _, db := range lt.dbSnapshots { 181 if err := callback(db.boltdb); err != nil { 182 return err 183 } 184 } 185 186 return nil 187 } 188 189 func (lt *Table) getOrAddDB(name string) (*bbolt.DB, error) { 190 lt.dbsMtx.RLock() 191 db, ok := lt.dbs[name] 192 lt.dbsMtx.RUnlock() 193 194 if ok { 195 return db, nil 196 } 197 198 lt.dbsMtx.Lock() 199 defer lt.dbsMtx.Unlock() 200 201 db, ok = lt.dbs[name] 202 if ok { 203 return db, nil 204 } 205 206 var err error 207 db, err = shipper_util.SafeOpenBoltdbFile(filepath.Join(lt.path, name)) 208 if err != nil { 209 return nil, err 210 } 211 212 lt.dbs[name] = db 213 214 return db, nil 215 } 216 217 // Write writes to a db locally with write time set to now. 218 func (lt *Table) Write(ctx context.Context, writes local.TableWrites) error { 219 return lt.write(ctx, time.Now(), writes) 220 } 221 222 // write writes to a db locally. It shards the db files by truncating the passed time by ShardDBsByDuration using https://golang.org/pkg/time/#Time.Truncate 223 // db files are named after the time shard i.e epoch of the truncated time. 224 // If a db file does not exist for a shard it gets created. 225 func (lt *Table) write(ctx context.Context, tm time.Time, writes local.TableWrites) error { 226 writeToBucket := local.IndexBucketName 227 if lt.makePerTenantBuckets { 228 userID, err := tenant.TenantID(ctx) 229 if err != nil { 230 return err 231 } 232 233 writeToBucket = []byte(userID) 234 } 235 236 // do not write to files older than init time otherwise we might endup modifying file which was already created and uploaded before last shutdown. 237 shard := tm.Truncate(ShardDBsByDuration).Unix() 238 if shard < lt.modifyShardsSince { 239 shard = lt.modifyShardsSince 240 } 241 242 db, err := lt.getOrAddDB(fmt.Sprint(shard)) 243 if err != nil { 244 return err 245 } 246 247 return local.WriteToDB(ctx, db, writeToBucket, writes) 248 } 249 250 // Stop closes all the open dbs. 251 func (lt *Table) Stop() { 252 lt.dbsMtx.Lock() 253 defer lt.dbsMtx.Unlock() 254 255 for name, db := range lt.dbs { 256 if err := db.Close(); err != nil { 257 level.Error(util_log.Logger).Log("msg", fmt.Errorf("failed to close file %s for table %s", name, lt.name)) 258 } 259 } 260 261 lt.dbs = map[string]*bbolt.DB{} 262 } 263 264 func (lt *Table) removeSnapshotDB(name string) error { 265 lt.dbSnapshotsMtx.Lock() 266 defer lt.dbSnapshotsMtx.Unlock() 267 268 db, ok := lt.dbSnapshots[name] 269 if !ok { 270 return nil 271 } 272 273 err := db.boltdb.Close() 274 if err != nil { 275 return err 276 } 277 278 delete(lt.dbSnapshots, name) 279 280 return os.Remove(filepath.Join(lt.path, fmt.Sprintf("%s%s", name, snapshotFileSuffix))) 281 } 282 283 // HandoverIndexesToShipper hands over the inactive dbs to shipper for uploading 284 func (lt *Table) HandoverIndexesToShipper(force bool) error { 285 indexesHandedOverToShipper, err := lt.handoverIndexesToShipper(force) 286 if err != nil { 287 return err 288 } 289 290 lt.dbsMtx.Lock() 291 defer lt.dbsMtx.Unlock() 292 293 for _, name := range indexesHandedOverToShipper { 294 delete(lt.dbs, name) 295 if err := lt.removeSnapshotDB(name); err != nil { 296 level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to remove snapshot db %s", name)) 297 } 298 } 299 300 return nil 301 } 302 303 func (lt *Table) handoverIndexesToShipper(force bool) ([]string, error) { 304 lt.dbsMtx.RLock() 305 defer lt.dbsMtx.RUnlock() 306 307 handoverShardsBefore := fmt.Sprint(getOldestActiveShardTime().Unix()) 308 309 // Adding check for considering only files which are sharded and have just an epoch in their name. 310 // Before introducing sharding we had a single file per table which were moved inside the folder per table as part of migration. 311 // The files were named with <table_prefix><period>. 312 // Since sharding was introduced we have a new file every 15 mins and their names just include an epoch timestamp, for e.g `1597927538`. 313 // We can remove this check after we no longer support upgrading from 1.5.0. 314 filenameWithEpochRe, err := regexp.Compile(`^[0-9]{10}$`) 315 if err != nil { 316 return nil, err 317 } 318 319 level.Info(util_log.Logger).Log("msg", fmt.Sprintf("handing over indexes to shipper %s", lt.name)) 320 321 var indexesHandedOverToShipper []string 322 for name, db := range lt.dbs { 323 // doing string comparison between unix timestamps in string form since they are anyways of same length 324 if !force && filenameWithEpochRe.MatchString(name) && name >= handoverShardsBefore { 325 continue 326 } 327 328 err = lt.indexShipper.AddIndex(lt.name, "", indexfile.BoltDBToIndexFile(db, lt.buildFileName(name))) 329 if err != nil { 330 return nil, err 331 } 332 indexesHandedOverToShipper = append(indexesHandedOverToShipper, name) 333 } 334 335 level.Info(util_log.Logger).Log("msg", fmt.Sprintf("finished handing over table %s", lt.name)) 336 337 return indexesHandedOverToShipper, nil 338 } 339 340 func (lt *Table) buildFileName(dbName string) string { 341 // Files are stored with <uploader>-<db-name> 342 fileName := fmt.Sprintf("%s-%s", lt.uploader, dbName) 343 344 // if the file is a migrated one then don't add its name to the object key otherwise we would re-upload them again here with a different name. 345 if lt.name == dbName { 346 fileName = lt.uploader 347 } 348 349 return fileName 350 } 351 352 func loadBoltDBsFromDir(dir string, metrics *metrics) (map[string]*bbolt.DB, error) { 353 dbs := map[string]*bbolt.DB{} 354 filesInfo, err := ioutil.ReadDir(dir) 355 if err != nil { 356 return nil, err 357 } 358 359 for _, fileInfo := range filesInfo { 360 if fileInfo.IsDir() { 361 continue 362 } 363 fullPath := filepath.Join(dir, fileInfo.Name()) 364 365 if strings.HasSuffix(fileInfo.Name(), indexfile.TempFileSuffix) || strings.HasSuffix(fileInfo.Name(), snapshotFileSuffix) { 366 // If an ingester is killed abruptly in the middle of an upload operation it could leave out a temp file which holds the snapshot of db for uploading. 367 // Cleaning up those temp files to avoid problems. 368 if err := os.Remove(fullPath); err != nil { 369 level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to remove temp file %s", fullPath), "err", err) 370 } 371 continue 372 } 373 374 db, err := shipper_util.SafeOpenBoltdbFile(fullPath) 375 if err != nil { 376 level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to open file %s. Please fix or remove this file.", fullPath), "err", err) 377 metrics.openExistingFileFailuresTotal.Inc() 378 continue 379 } 380 381 hasBucket := false 382 _ = db.View(func(tx *bbolt.Tx) error { 383 return tx.ForEach(func(_ []byte, _ *bbolt.Bucket) error { 384 hasBucket = true 385 return nil 386 }) 387 }) 388 389 if !hasBucket { 390 level.Info(util_log.Logger).Log("msg", fmt.Sprintf("file %s has no buckets, so removing it", fullPath)) 391 _ = db.Close() 392 if err := os.Remove(fullPath); err != nil { 393 level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to remove file %s without any buckets", fullPath), "err", err) 394 } 395 continue 396 } 397 398 dbs[fileInfo.Name()] = db 399 } 400 401 return dbs, nil 402 } 403 404 // getOldestActiveShardTime returns the time of oldest active shard with a buffer of 1 minute. 405 func getOldestActiveShardTime() time.Time { 406 // upload files excluding active shard. It could so happen that we just started a new shard but the file for last shard is still being updated due to pending writes or pending flush to disk. 407 // To avoid uploading it, excluding previous active shard as well if it has been not more than a minute since it became inactive. 408 return time.Now().Add(-time.Minute).Truncate(ShardDBsByDuration) 409 }