github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/storage/db.go (about)

     1  package storage
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  	"runtime/debug"
     8  
     9  	"github.com/dgraph-io/badger/v2"
    10  	"github.com/dgraph-io/badger/v2/options"
    11  	"github.com/prometheus/client_golang/prometheus"
    12  	"github.com/sirupsen/logrus"
    13  
    14  	"github.com/pyroscope-io/pyroscope/pkg/storage/cache"
    15  	"github.com/pyroscope-io/pyroscope/pkg/util/bytesize"
    16  )
    17  
    18  type db struct {
    19  	name   string
    20  	logger logrus.FieldLogger
    21  
    22  	*badger.DB
    23  	*cache.Cache
    24  
    25  	lastGC  bytesize.ByteSize
    26  	gcCount prometheus.Counter
    27  }
    28  
    29  type Prefix string
    30  
    31  const (
    32  	segmentPrefix    Prefix = "s:"
    33  	treePrefix       Prefix = "t:"
    34  	dictionaryPrefix Prefix = "d:"
    35  	dimensionPrefix  Prefix = "i:"
    36  )
    37  
    38  func (p Prefix) String() string      { return string(p) }
    39  func (p Prefix) bytes() []byte       { return []byte(p) }
    40  func (p Prefix) key(k string) []byte { return []byte(string(p) + k) }
    41  
    42  func (p Prefix) trim(k []byte) ([]byte, bool) {
    43  	if len(k) > len(p) {
    44  		return k[len(p):], true
    45  	}
    46  	return nil, false
    47  }
    48  
    49  func (s *Storage) newBadger(name string, p Prefix, codec cache.Codec) (BadgerDBWithCache, error) {
    50  	var d *db
    51  	var err error
    52  	logger := logrus.New()
    53  	logger.SetLevel(s.config.badgerLogLevel)
    54  
    55  	if s.config.inMemory {
    56  		badgerDB, err := badger.Open(badger.DefaultOptions("").
    57  			WithInMemory(true).
    58  			WithLogger(logger.WithField("badger", name)))
    59  		if err != nil {
    60  			return nil, err
    61  		}
    62  
    63  		d = &db{
    64  			name:   name,
    65  			DB:     badgerDB,
    66  			logger: s.logger.WithField("db", name),
    67  		}
    68  
    69  		if codec != nil {
    70  			d.Cache = cache.New(cache.Config{
    71  				DB:      badgerDB,
    72  				Metrics: s.metrics.createCacheMetrics(name),
    73  				TTL:     s.cacheTTL,
    74  				Prefix:  p.String(),
    75  				Codec:   codec,
    76  			})
    77  		}
    78  		return d, nil
    79  	}
    80  
    81  	badgerPath := filepath.Join(s.config.badgerBasePath, name)
    82  	if err = os.MkdirAll(badgerPath, 0o755); err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	defer func() {
    87  		if r := recover(); r != nil {
    88  			// BadgerDB may panic because of file system access permissions. In particular,
    89  			// if is running in kubernetes with incorrect/unset fsGroup security context:
    90  			// https://github.com/pyroscope-io/pyroscope/issues/350.
    91  			err = fmt.Errorf("failed to open database\n\n"+
    92  				"Please make sure Pyroscope Server has write access permissions to %s directory.\n\n"+
    93  				"Recovered from panic: %v\n%v", badgerPath, r, string(debug.Stack()))
    94  		}
    95  	}()
    96  
    97  	badgerDB, err := badger.Open(badger.DefaultOptions(badgerPath).
    98  		WithTruncate(!s.config.badgerNoTruncate).
    99  		WithSyncWrites(false).
   100  		WithCompactL0OnClose(false).
   101  		WithCompression(options.ZSTD).
   102  		WithLogger(logger.WithField("badger", name)))
   103  
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  
   108  	d = &db{
   109  		name:    name,
   110  		DB:      badgerDB,
   111  		logger:  s.logger.WithField("db", name),
   112  		gcCount: s.metrics.gcCount.WithLabelValues(name),
   113  	}
   114  
   115  	if codec != nil {
   116  		d.Cache = cache.New(cache.Config{
   117  			DB:      badgerDB,
   118  			Metrics: s.metrics.createCacheMetrics(name),
   119  			TTL:     s.cacheTTL,
   120  			Prefix:  p.String(),
   121  			Codec:   codec,
   122  		})
   123  	}
   124  
   125  	s.maintenanceTask(s.badgerGCTaskInterval, func() {
   126  		diff := calculateDBSize(badgerPath) - d.lastGC
   127  		if d.lastGC == 0 || s.gcSizeDiff == 0 || diff > s.gcSizeDiff {
   128  			d.runGC(0.7)
   129  			d.gcCount.Inc()
   130  			d.lastGC = calculateDBSize(badgerPath)
   131  		}
   132  	})
   133  
   134  	return d, nil
   135  }
   136  
   137  func (d *db) Size() bytesize.ByteSize {
   138  	// The value is updated once per minute.
   139  	lsm, vlog := d.DB.Size()
   140  	return bytesize.ByteSize(lsm + vlog)
   141  }
   142  
   143  func (d *db) CacheSize() uint64 {
   144  	return d.Cache.Size()
   145  }
   146  
   147  func (d *db) Name() string {
   148  	return d.name
   149  }
   150  
   151  func (d *db) DBInstance() *badger.DB {
   152  	return d.DB
   153  }
   154  func (d *db) CacheInstance() *cache.Cache {
   155  	return d.Cache
   156  }
   157  
   158  func (d *db) runGC(discardRatio float64) (reclaimed bool) {
   159  	d.logger.Debug("starting badger garbage collection")
   160  	for {
   161  		switch err := d.RunValueLogGC(discardRatio); err {
   162  		default:
   163  			d.logger.WithError(err).Warn("failed to run GC")
   164  			return false
   165  		case badger.ErrNoRewrite:
   166  			return reclaimed
   167  		case nil:
   168  			reclaimed = true
   169  			continue
   170  		}
   171  	}
   172  }
   173  
   174  // TODO(kolesnikovae): filepath.Walk is notoriously slow.
   175  //  Consider use of https://github.com/karrick/godirwalk.
   176  //  Although, every badger.DB calculates its size (reported
   177  //  via Size) in the same way every minute.
   178  func calculateDBSize(path string) bytesize.ByteSize {
   179  	var size int64
   180  	_ = filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
   181  		if err != nil {
   182  			return err
   183  		}
   184  		switch filepath.Ext(path) {
   185  		case ".sst", ".vlog":
   186  			size += info.Size()
   187  		}
   188  		return nil
   189  	})
   190  	return bytesize.ByteSize(size)
   191  }