github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/storage/badger/cleaner.go (about) 1 package badger 2 3 import ( 4 "time" 5 6 "github.com/dgraph-io/badger/v2" 7 "github.com/rs/zerolog" 8 9 "github.com/onflow/flow-go/module" 10 "github.com/onflow/flow-go/module/component" 11 "github.com/onflow/flow-go/module/irrecoverable" 12 "github.com/onflow/flow-go/utils/rand" 13 ) 14 15 // Cleaner uses component.ComponentManager to implement module.Startable and module.ReadyDoneAware 16 // to run an internal goroutine which run badger value log garbage collection at a semi-regular interval. 17 // The Cleaner exists for 2 reasons: 18 // - Run GC frequently enough that each GC is relatively inexpensive 19 // - Avoid GC being synchronized across all nodes. Since in the happy path, all nodes have very similar 20 // database load patterns, without intervention they are likely to schedule GC at the same time, which 21 // can cause temporary consensus halts. 22 type Cleaner struct { 23 component.Component 24 log zerolog.Logger 25 db *badger.DB 26 metrics module.CleanerMetrics 27 ratio float64 28 interval time.Duration 29 } 30 31 var _ component.Component = (*Cleaner)(nil) 32 33 // NewCleaner returns a cleaner that runs the badger value log garbage collection once every `interval` duration 34 // if an interval of zero is passed in, we will not run the GC at all. 35 func NewCleaner(log zerolog.Logger, db *badger.DB, metrics module.CleanerMetrics, interval time.Duration) *Cleaner { 36 // NOTE: we run garbage collection frequently at points in our business 37 // logic where we are likely to have a small breather in activity; it thus 38 // makes sense to run garbage collection often, with a smaller ratio, rather 39 // than running it rarely and having big rewrites at once 40 c := &Cleaner{ 41 log: log.With().Str("component", "cleaner").Logger(), 42 db: db, 43 metrics: metrics, 44 ratio: 0.2, 45 interval: interval, 46 } 47 48 // Disable if passed in 0 as interval 49 if c.interval == 0 { 50 c.Component = &module.NoopComponent{} 51 return c 52 } 53 54 c.Component = component.NewComponentManagerBuilder(). 55 AddWorker(c.gcWorkerRoutine). 56 Build() 57 58 return c 59 } 60 61 // gcWorkerRoutine runs badger GC on timely basis. 62 func (c *Cleaner) gcWorkerRoutine(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 63 ready() 64 ticker := time.NewTicker(c.nextWaitDuration()) 65 defer ticker.Stop() 66 for { 67 select { 68 case <-ctx.Done(): 69 return 70 case <-ticker.C: 71 c.runGC() 72 73 // reset the ticker with a new interval and random jitter 74 ticker.Reset(c.nextWaitDuration()) 75 } 76 } 77 } 78 79 // nextWaitDuration calculates next duration for Cleaner to wait before attempting to run GC. 80 // We add 20% jitter into the interval, so that we don't risk nodes syncing their GC calls over time. 81 // Therefore GC is run every X seconds, where X is uniformly sampled from [interval, interval*1.2] 82 func (c *Cleaner) nextWaitDuration() time.Duration { 83 jitter, err := rand.Uint64n(uint64(c.interval.Nanoseconds() / 5)) 84 if err != nil { 85 // if randomness fails, do not use a jitter for this instance. 86 // TODO: address the error properly and not swallow it. 87 // In this specific case, `utils/rand` only errors if the system randomness fails 88 // which is a symptom of a wider failure. Many other node components would catch such 89 // a failure. 90 c.log.Warn().Msg("jitter is zero beacuse system randomness has failed") 91 jitter = 0 92 } 93 return time.Duration(c.interval.Nanoseconds() + int64(jitter)) 94 } 95 96 // runGC runs garbage collection for badger DB, handles sentinel errors and reports metrics. 97 func (c *Cleaner) runGC() { 98 started := time.Now() 99 err := c.db.RunValueLogGC(c.ratio) 100 if err == badger.ErrRejected { 101 // NOTE: this happens when a GC call is already running 102 c.log.Warn().Msg("garbage collection on value log already running") 103 return 104 } 105 if err == badger.ErrNoRewrite { 106 // NOTE: this happens when no files have any garbage to drop 107 c.log.Debug().Msg("garbage collection on value log unnecessary") 108 return 109 } 110 if err != nil { 111 c.log.Error().Err(err).Msg("garbage collection on value log failed") 112 return 113 } 114 115 runtime := time.Since(started) 116 c.log.Debug(). 117 Dur("gc_duration", runtime). 118 Msg("garbage collection on value log executed") 119 c.metrics.RanGC(runtime) 120 }