github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/shipper/index/table.go (about)

     1  package index
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"os"
     8  	"path"
     9  	"path/filepath"
    10  	"regexp"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/go-kit/log/level"
    16  	"github.com/grafana/dskit/tenant"
    17  	"go.etcd.io/bbolt"
    18  
    19  	"github.com/grafana/loki/pkg/storage/chunk/client/local"
    20  	chunk_util "github.com/grafana/loki/pkg/storage/chunk/client/util"
    21  	"github.com/grafana/loki/pkg/storage/stores/series/index"
    22  	"github.com/grafana/loki/pkg/storage/stores/shipper/index/indexfile"
    23  	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
    24  	util_log "github.com/grafana/loki/pkg/util/log"
    25  )
    26  
    27  const (
    28  	// create a new db sharded by time based on when write request is received
    29  	ShardDBsByDuration = 15 * time.Minute
    30  
    31  	// a snapshot file is created with name of the db + snapshotFileSuffix periodically for read operation.
    32  	snapshotFileSuffix = ".snapshot"
    33  )
    34  
    35  type BoltDBIndexClient interface {
    36  	QueryWithCursor(_ context.Context, c *bbolt.Cursor, query index.Query, callback index.QueryPagesCallback) error
    37  	WriteToDB(ctx context.Context, db *bbolt.DB, bucketName []byte, writes local.TableWrites) error
    38  }
    39  
    40  type dbSnapshot struct {
    41  	boltdb      *bbolt.DB
    42  	writesCount int
    43  }
    44  
    45  // Table is a collection of multiple index files created for a same table by the ingester.
    46  // It is used on the write path for writing the index.
    47  // All the public methods are concurrency safe and take care of mutexes to avoid any data race.
    48  type Table struct {
    49  	name                 string
    50  	path                 string
    51  	uploader             string
    52  	indexShipper         Shipper
    53  	makePerTenantBuckets bool
    54  
    55  	dbs    map[string]*bbolt.DB
    56  	dbsMtx sync.RWMutex
    57  
    58  	dbSnapshots    map[string]*dbSnapshot
    59  	dbSnapshotsMtx sync.RWMutex
    60  
    61  	modifyShardsSince int64
    62  }
    63  
    64  // NewTable create a new Table without looking for any existing local dbs belonging to the table.
    65  func NewTable(path, uploader string, indexShipper Shipper, makePerTenantBuckets bool) (*Table, error) {
    66  	err := chunk_util.EnsureDirectory(path)
    67  	if err != nil {
    68  		return nil, err
    69  	}
    70  
    71  	return newTableWithDBs(map[string]*bbolt.DB{}, path, uploader, indexShipper, makePerTenantBuckets)
    72  }
    73  
    74  // LoadTable loads local dbs belonging to the table and creates a new Table with references to dbs if there are any otherwise it doesn't create a table
    75  func LoadTable(path, uploader string, indexShipper Shipper, makePerTenantBuckets bool, metrics *metrics) (*Table, error) {
    76  	dbs, err := loadBoltDBsFromDir(path, metrics)
    77  	if err != nil {
    78  		return nil, err
    79  	}
    80  
    81  	if len(dbs) == 0 {
    82  		return nil, nil
    83  	}
    84  
    85  	return newTableWithDBs(dbs, path, uploader, indexShipper, makePerTenantBuckets)
    86  }
    87  
    88  func newTableWithDBs(dbs map[string]*bbolt.DB, path, uploader string, indexShipper Shipper, makePerTenantBuckets bool) (*Table, error) {
    89  	return &Table{
    90  		name:                 filepath.Base(path),
    91  		path:                 path,
    92  		uploader:             uploader,
    93  		indexShipper:         indexShipper,
    94  		dbs:                  dbs,
    95  		dbSnapshots:          map[string]*dbSnapshot{},
    96  		modifyShardsSince:    time.Now().Unix(),
    97  		makePerTenantBuckets: makePerTenantBuckets,
    98  	}, nil
    99  }
   100  
   101  func (lt *Table) Snapshot() error {
   102  	lt.dbsMtx.RLock()
   103  	defer lt.dbsMtx.RUnlock()
   104  
   105  	lt.dbSnapshotsMtx.Lock()
   106  	defer lt.dbSnapshotsMtx.Unlock()
   107  
   108  	level.Debug(util_log.Logger).Log("msg", fmt.Sprintf("snapshotting table %s", lt.name))
   109  
   110  	for name, db := range lt.dbs {
   111  		level.Debug(util_log.Logger).Log("msg", fmt.Sprintf("checking db %s for snapshot", name))
   112  		srcWriteCount := 0
   113  		err := db.View(func(tx *bbolt.Tx) error {
   114  			srcWriteCount = db.Stats().TxStats.Write
   115  			return nil
   116  		})
   117  		if err != nil {
   118  			return err
   119  		}
   120  
   121  		snapshot, ok := lt.dbSnapshots[name]
   122  		filePath := path.Join(lt.path, fmt.Sprintf("%s%s", name, snapshotFileSuffix))
   123  
   124  		if !ok {
   125  			snapshot = &dbSnapshot{}
   126  		} else if snapshot.writesCount == srcWriteCount {
   127  			continue
   128  		} else {
   129  			if err := snapshot.boltdb.Close(); err != nil {
   130  				return err
   131  			}
   132  
   133  			if err := os.Remove(filePath); err != nil {
   134  				return err
   135  			}
   136  		}
   137  
   138  		f, err := os.Create(filePath)
   139  		if err != nil {
   140  			return err
   141  		}
   142  
   143  		err = db.View(func(tx *bbolt.Tx) (err error) {
   144  			_, err = tx.WriteTo(f)
   145  			return
   146  		})
   147  		if err != nil {
   148  			return err
   149  		}
   150  
   151  		// flush the file to disk.
   152  		if err := f.Sync(); err != nil {
   153  			return err
   154  		}
   155  
   156  		if err := f.Close(); err != nil {
   157  			return err
   158  		}
   159  
   160  		snapshot.boltdb, err = shipper_util.SafeOpenBoltdbFile(filePath)
   161  		if err != nil {
   162  			return err
   163  		}
   164  
   165  		snapshot.writesCount = srcWriteCount
   166  		lt.dbSnapshots[name] = snapshot
   167  
   168  		level.Debug(util_log.Logger).Log("msg", fmt.Sprintf("finished snaphotting db %s", name))
   169  	}
   170  
   171  	level.Debug(util_log.Logger).Log("msg", fmt.Sprintf("finished snapshotting table %s", lt.name))
   172  
   173  	return nil
   174  }
   175  
   176  func (lt *Table) ForEach(_ context.Context, callback func(boltdb *bbolt.DB) error) error {
   177  	lt.dbSnapshotsMtx.RLock()
   178  	defer lt.dbSnapshotsMtx.RUnlock()
   179  
   180  	for _, db := range lt.dbSnapshots {
   181  		if err := callback(db.boltdb); err != nil {
   182  			return err
   183  		}
   184  	}
   185  
   186  	return nil
   187  }
   188  
   189  func (lt *Table) getOrAddDB(name string) (*bbolt.DB, error) {
   190  	lt.dbsMtx.RLock()
   191  	db, ok := lt.dbs[name]
   192  	lt.dbsMtx.RUnlock()
   193  
   194  	if ok {
   195  		return db, nil
   196  	}
   197  
   198  	lt.dbsMtx.Lock()
   199  	defer lt.dbsMtx.Unlock()
   200  
   201  	db, ok = lt.dbs[name]
   202  	if ok {
   203  		return db, nil
   204  	}
   205  
   206  	var err error
   207  	db, err = shipper_util.SafeOpenBoltdbFile(filepath.Join(lt.path, name))
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  
   212  	lt.dbs[name] = db
   213  
   214  	return db, nil
   215  }
   216  
   217  // Write writes to a db locally with write time set to now.
   218  func (lt *Table) Write(ctx context.Context, writes local.TableWrites) error {
   219  	return lt.write(ctx, time.Now(), writes)
   220  }
   221  
   222  // write writes to a db locally. It shards the db files by truncating the passed time by ShardDBsByDuration using https://golang.org/pkg/time/#Time.Truncate
   223  // db files are named after the time shard i.e epoch of the truncated time.
   224  // If a db file does not exist for a shard it gets created.
   225  func (lt *Table) write(ctx context.Context, tm time.Time, writes local.TableWrites) error {
   226  	writeToBucket := local.IndexBucketName
   227  	if lt.makePerTenantBuckets {
   228  		userID, err := tenant.TenantID(ctx)
   229  		if err != nil {
   230  			return err
   231  		}
   232  
   233  		writeToBucket = []byte(userID)
   234  	}
   235  
   236  	// do not write to files older than init time otherwise we might endup modifying file which was already created and uploaded before last shutdown.
   237  	shard := tm.Truncate(ShardDBsByDuration).Unix()
   238  	if shard < lt.modifyShardsSince {
   239  		shard = lt.modifyShardsSince
   240  	}
   241  
   242  	db, err := lt.getOrAddDB(fmt.Sprint(shard))
   243  	if err != nil {
   244  		return err
   245  	}
   246  
   247  	return local.WriteToDB(ctx, db, writeToBucket, writes)
   248  }
   249  
   250  // Stop closes all the open dbs.
   251  func (lt *Table) Stop() {
   252  	lt.dbsMtx.Lock()
   253  	defer lt.dbsMtx.Unlock()
   254  
   255  	for name, db := range lt.dbs {
   256  		if err := db.Close(); err != nil {
   257  			level.Error(util_log.Logger).Log("msg", fmt.Errorf("failed to close file %s for table %s", name, lt.name))
   258  		}
   259  	}
   260  
   261  	lt.dbs = map[string]*bbolt.DB{}
   262  }
   263  
   264  func (lt *Table) removeSnapshotDB(name string) error {
   265  	lt.dbSnapshotsMtx.Lock()
   266  	defer lt.dbSnapshotsMtx.Unlock()
   267  
   268  	db, ok := lt.dbSnapshots[name]
   269  	if !ok {
   270  		return nil
   271  	}
   272  
   273  	err := db.boltdb.Close()
   274  	if err != nil {
   275  		return err
   276  	}
   277  
   278  	delete(lt.dbSnapshots, name)
   279  
   280  	return os.Remove(filepath.Join(lt.path, fmt.Sprintf("%s%s", name, snapshotFileSuffix)))
   281  }
   282  
   283  // HandoverIndexesToShipper hands over the inactive dbs to shipper for uploading
   284  func (lt *Table) HandoverIndexesToShipper(force bool) error {
   285  	indexesHandedOverToShipper, err := lt.handoverIndexesToShipper(force)
   286  	if err != nil {
   287  		return err
   288  	}
   289  
   290  	lt.dbsMtx.Lock()
   291  	defer lt.dbsMtx.Unlock()
   292  
   293  	for _, name := range indexesHandedOverToShipper {
   294  		delete(lt.dbs, name)
   295  		if err := lt.removeSnapshotDB(name); err != nil {
   296  			level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to remove snapshot db %s", name))
   297  		}
   298  	}
   299  
   300  	return nil
   301  }
   302  
   303  func (lt *Table) handoverIndexesToShipper(force bool) ([]string, error) {
   304  	lt.dbsMtx.RLock()
   305  	defer lt.dbsMtx.RUnlock()
   306  
   307  	handoverShardsBefore := fmt.Sprint(getOldestActiveShardTime().Unix())
   308  
   309  	// Adding check for considering only files which are sharded and have just an epoch in their name.
   310  	// Before introducing sharding we had a single file per table which were moved inside the folder per table as part of migration.
   311  	// The files were named with <table_prefix><period>.
   312  	// Since sharding was introduced we have a new file every 15 mins and their names just include an epoch timestamp, for e.g `1597927538`.
   313  	// We can remove this check after we no longer support upgrading from 1.5.0.
   314  	filenameWithEpochRe, err := regexp.Compile(`^[0-9]{10}$`)
   315  	if err != nil {
   316  		return nil, err
   317  	}
   318  
   319  	level.Info(util_log.Logger).Log("msg", fmt.Sprintf("handing over indexes to shipper %s", lt.name))
   320  
   321  	var indexesHandedOverToShipper []string
   322  	for name, db := range lt.dbs {
   323  		// doing string comparison between unix timestamps in string form since they are anyways of same length
   324  		if !force && filenameWithEpochRe.MatchString(name) && name >= handoverShardsBefore {
   325  			continue
   326  		}
   327  
   328  		err = lt.indexShipper.AddIndex(lt.name, "", indexfile.BoltDBToIndexFile(db, lt.buildFileName(name)))
   329  		if err != nil {
   330  			return nil, err
   331  		}
   332  		indexesHandedOverToShipper = append(indexesHandedOverToShipper, name)
   333  	}
   334  
   335  	level.Info(util_log.Logger).Log("msg", fmt.Sprintf("finished handing over table %s", lt.name))
   336  
   337  	return indexesHandedOverToShipper, nil
   338  }
   339  
   340  func (lt *Table) buildFileName(dbName string) string {
   341  	// Files are stored with <uploader>-<db-name>
   342  	fileName := fmt.Sprintf("%s-%s", lt.uploader, dbName)
   343  
   344  	// if the file is a migrated one then don't add its name to the object key otherwise we would re-upload them again here with a different name.
   345  	if lt.name == dbName {
   346  		fileName = lt.uploader
   347  	}
   348  
   349  	return fileName
   350  }
   351  
   352  func loadBoltDBsFromDir(dir string, metrics *metrics) (map[string]*bbolt.DB, error) {
   353  	dbs := map[string]*bbolt.DB{}
   354  	filesInfo, err := ioutil.ReadDir(dir)
   355  	if err != nil {
   356  		return nil, err
   357  	}
   358  
   359  	for _, fileInfo := range filesInfo {
   360  		if fileInfo.IsDir() {
   361  			continue
   362  		}
   363  		fullPath := filepath.Join(dir, fileInfo.Name())
   364  
   365  		if strings.HasSuffix(fileInfo.Name(), indexfile.TempFileSuffix) || strings.HasSuffix(fileInfo.Name(), snapshotFileSuffix) {
   366  			// If an ingester is killed abruptly in the middle of an upload operation it could leave out a temp file which holds the snapshot of db for uploading.
   367  			// Cleaning up those temp files to avoid problems.
   368  			if err := os.Remove(fullPath); err != nil {
   369  				level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to remove temp file %s", fullPath), "err", err)
   370  			}
   371  			continue
   372  		}
   373  
   374  		db, err := shipper_util.SafeOpenBoltdbFile(fullPath)
   375  		if err != nil {
   376  			level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to open file %s. Please fix or remove this file.", fullPath), "err", err)
   377  			metrics.openExistingFileFailuresTotal.Inc()
   378  			continue
   379  		}
   380  
   381  		hasBucket := false
   382  		_ = db.View(func(tx *bbolt.Tx) error {
   383  			return tx.ForEach(func(_ []byte, _ *bbolt.Bucket) error {
   384  				hasBucket = true
   385  				return nil
   386  			})
   387  		})
   388  
   389  		if !hasBucket {
   390  			level.Info(util_log.Logger).Log("msg", fmt.Sprintf("file %s has no buckets, so removing it", fullPath))
   391  			_ = db.Close()
   392  			if err := os.Remove(fullPath); err != nil {
   393  				level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to remove file %s without any buckets", fullPath), "err", err)
   394  			}
   395  			continue
   396  		}
   397  
   398  		dbs[fileInfo.Name()] = db
   399  	}
   400  
   401  	return dbs, nil
   402  }
   403  
   404  // getOldestActiveShardTime returns the time of oldest active shard with a buffer of 1 minute.
   405  func getOldestActiveShardTime() time.Time {
   406  	// upload files excluding active shard. It could so happen that we just started a new shard but the file for last shard is still being updated due to pending writes or pending flush to disk.
   407  	// To avoid uploading it, excluding previous active shard as well if it has been not more than a minute since it became inactive.
   408  	return time.Now().Add(-time.Minute).Truncate(ShardDBsByDuration)
   409  }