github.com/onflow/flow-go@v0.33.17/storage/badger/cleaner.go (about) 1 // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED 2 3 package badger 4 5 import ( 6 "time" 7 8 "github.com/dgraph-io/badger/v2" 9 "github.com/rs/zerolog" 10 11 "github.com/onflow/flow-go/module" 12 "github.com/onflow/flow-go/module/component" 13 "github.com/onflow/flow-go/module/irrecoverable" 14 "github.com/onflow/flow-go/utils/rand" 15 ) 16 17 // Cleaner uses component.ComponentManager to implement module.Startable and module.ReadyDoneAware 18 // to run an internal goroutine which run badger value log garbage collection at a semi-regular interval. 19 // The Cleaner exists for 2 reasons: 20 // - Run GC frequently enough that each GC is relatively inexpensive 21 // - Avoid GC being synchronized across all nodes. Since in the happy path, all nodes have very similar 22 // database load patterns, without intervention they are likely to schedule GC at the same time, which 23 // can cause temporary consensus halts. 24 type Cleaner struct { 25 component.Component 26 log zerolog.Logger 27 db *badger.DB 28 metrics module.CleanerMetrics 29 ratio float64 30 interval time.Duration 31 } 32 33 var _ component.Component = (*Cleaner)(nil) 34 35 // NewCleaner returns a cleaner that runs the badger value log garbage collection once every `interval` duration 36 // if an interval of zero is passed in, we will not run the GC at all. 37 func NewCleaner(log zerolog.Logger, db *badger.DB, metrics module.CleanerMetrics, interval time.Duration) *Cleaner { 38 // NOTE: we run garbage collection frequently at points in our business 39 // logic where we are likely to have a small breather in activity; it thus 40 // makes sense to run garbage collection often, with a smaller ratio, rather 41 // than running it rarely and having big rewrites at once 42 c := &Cleaner{ 43 log: log.With().Str("component", "cleaner").Logger(), 44 db: db, 45 metrics: metrics, 46 ratio: 0.2, 47 interval: interval, 48 } 49 50 // Disable if passed in 0 as interval 51 if c.interval == 0 { 52 c.Component = &module.NoopComponent{} 53 return c 54 } 55 56 c.Component = component.NewComponentManagerBuilder(). 57 AddWorker(c.gcWorkerRoutine). 58 Build() 59 60 return c 61 } 62 63 // gcWorkerRoutine runs badger GC on timely basis. 64 func (c *Cleaner) gcWorkerRoutine(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 65 ready() 66 ticker := time.NewTicker(c.nextWaitDuration()) 67 defer ticker.Stop() 68 for { 69 select { 70 case <-ctx.Done(): 71 return 72 case <-ticker.C: 73 c.runGC() 74 75 // reset the ticker with a new interval and random jitter 76 ticker.Reset(c.nextWaitDuration()) 77 } 78 } 79 } 80 81 // nextWaitDuration calculates next duration for Cleaner to wait before attempting to run GC. 82 // We add 20% jitter into the interval, so that we don't risk nodes syncing their GC calls over time. 83 // Therefore GC is run every X seconds, where X is uniformly sampled from [interval, interval*1.2] 84 func (c *Cleaner) nextWaitDuration() time.Duration { 85 jitter, err := rand.Uint64n(uint64(c.interval.Nanoseconds() / 5)) 86 if err != nil { 87 // if randomness fails, do not use a jitter for this instance. 88 // TODO: address the error properly and not swallow it. 89 // In this specific case, `utils/rand` only errors if the system randomness fails 90 // which is a symptom of a wider failure. Many other node components would catch such 91 // a failure. 92 c.log.Warn().Msg("jitter is zero beacuse system randomness has failed") 93 jitter = 0 94 } 95 return time.Duration(c.interval.Nanoseconds() + int64(jitter)) 96 } 97 98 // runGC runs garbage collection for badger DB, handles sentinel errors and reports metrics. 99 func (c *Cleaner) runGC() { 100 started := time.Now() 101 err := c.db.RunValueLogGC(c.ratio) 102 if err == badger.ErrRejected { 103 // NOTE: this happens when a GC call is already running 104 c.log.Warn().Msg("garbage collection on value log already running") 105 return 106 } 107 if err == badger.ErrNoRewrite { 108 // NOTE: this happens when no files have any garbage to drop 109 c.log.Debug().Msg("garbage collection on value log unnecessary") 110 return 111 } 112 if err != nil { 113 c.log.Error().Err(err).Msg("garbage collection on value log failed") 114 return 115 } 116 117 runtime := time.Since(started) 118 c.log.Debug(). 119 Dur("gc_duration", runtime). 120 Msg("garbage collection on value log executed") 121 c.metrics.RanGC(runtime) 122 }