github.com/koko1123/flow-go-1@v0.29.6/storage/badger/cleaner.go (about)

     1  // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED
     2  
     3  package badger
     4  
     5  import (
     6  	"math/rand"
     7  	"time"
     8  
     9  	"github.com/dgraph-io/badger/v3"
    10  	"github.com/rs/zerolog"
    11  
    12  	"github.com/koko1123/flow-go-1/module"
    13  )
    14  
    15  type Cleaner struct {
    16  	log     zerolog.Logger
    17  	db      *badger.DB
    18  	metrics module.CleanerMetrics
    19  	enabled bool
    20  	ratio   float64
    21  	freq    int
    22  	calls   int
    23  }
    24  
    25  // NewCleaner returns a cleaner that runs the badger value log garbage collection once every `frequency` calls
    26  // if a frequency of zero is passed in, we will not run the GC at all
    27  func NewCleaner(log zerolog.Logger, db *badger.DB, metrics module.CleanerMetrics, frequency int) *Cleaner {
    28  	// NOTE: we run garbage collection frequently at points in our business
    29  	// logic where we are likely to have a small breather in activity; it thus
    30  	// makes sense to run garbage collection often, with a smaller ratio, rather
    31  	// than running it rarely and having big rewrites at once
    32  	c := &Cleaner{
    33  		log:     log.With().Str("component", "cleaner").Logger(),
    34  		db:      db,
    35  		metrics: metrics,
    36  		ratio:   0.2,
    37  		freq:    frequency,
    38  		enabled: frequency > 0, // Disable if passed in 0 as frequency
    39  	}
    40  	// we don't want the entire network to run GC at the same time, so
    41  	// distribute evenly over time
    42  	if c.enabled {
    43  		c.calls = rand.Intn(c.freq)
    44  	}
    45  	return c
    46  }
    47  
    48  func (c *Cleaner) RunGC() {
    49  	if !c.enabled {
    50  		return
    51  	}
    52  	// only actually run approximately every frequency number of calls
    53  	c.calls++
    54  	if c.calls < c.freq {
    55  		return
    56  	}
    57  
    58  	// we add 20% jitter into the interval, so that we don't risk nodes syncing
    59  	// up on their GC calls over time
    60  	c.calls = rand.Intn(c.freq / 5)
    61  
    62  	// run the garbage collection in own goroutine and handle sentinel errors
    63  	go func() {
    64  		started := time.Now()
    65  		err := c.db.RunValueLogGC(c.ratio)
    66  		if err == badger.ErrRejected {
    67  			// NOTE: this happens when a GC call is already running
    68  			c.log.Warn().Msg("garbage collection on value log already running")
    69  			return
    70  		}
    71  		if err == badger.ErrNoRewrite {
    72  			// NOTE: this happens when no files have any garbage to drop
    73  			c.log.Debug().Msg("garbage collection on value log unnecessary")
    74  			return
    75  		}
    76  		if err != nil {
    77  			c.log.Error().Err(err).Msg("garbage collection on value log failed")
    78  			return
    79  		}
    80  
    81  		runtime := time.Since(started)
    82  		c.log.Debug().
    83  			Dur("gc_duration", runtime).
    84  			Msg("garbage collection on value log executed")
    85  		c.metrics.RanGC(runtime)
    86  	}()
    87  }