github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ingester/replay_controller.go (about) 1 package ingester 2 3 import ( 4 "sync" 5 6 "github.com/dustin/go-humanize" 7 "github.com/go-kit/log/level" 8 "go.uber.org/atomic" 9 10 util_log "github.com/grafana/loki/pkg/util/log" 11 ) 12 13 type replayFlusher struct { 14 i *Ingester 15 } 16 17 func (f *replayFlusher) Flush() { 18 f.i.InitFlushQueues() 19 f.i.flush(false) // flush data but don't remove streams from the ingesters 20 21 // Similar to sweepUsers with the exception that it will not remove streams 22 // afterwards to prevent unlinking a stream which may receive later writes from the WAL. 23 // We have to do this here after the flushQueues have been drained. 24 instances := f.i.getInstances() 25 26 for _, instance := range instances { 27 28 _ = instance.streams.ForEach(func(s *stream) (bool, error) { 29 f.i.removeFlushedChunks(instance, s, false) 30 return true, nil 31 }) 32 33 } 34 35 } 36 37 type Flusher interface { 38 Flush() 39 } 40 41 // replayController handles coordinating backpressure between WAL replays and chunk flushing. 42 type replayController struct { 43 // Note, this has to be defined first to make sure it is aligned properly for 32bit ARM OS 44 // From https://golang.org/pkg/sync/atomic/#pkg-note-BUG: 45 // > On ARM, 386, and 32-bit MIPS, it is the caller's responsibility to arrange for 46 // > 64-bit alignment of 64-bit words accessed atomically. The first word in a 47 // > variable or in an allocated struct, array, or slice can be relied upon to 48 // > be 64-bit aligned. 49 currentBytes atomic.Int64 50 cfg WALConfig 51 metrics *ingesterMetrics 52 cond *sync.Cond 53 isFlushing atomic.Bool 54 flusher Flusher 55 } 56 57 // flusher is expected to reduce pressure via calling Sub 58 func newReplayController(metrics *ingesterMetrics, cfg WALConfig, flusher Flusher) *replayController { 59 return &replayController{ 60 cfg: cfg, 61 metrics: metrics, 62 cond: sync.NewCond(&sync.Mutex{}), 63 flusher: flusher, 64 } 65 } 66 67 func (c *replayController) Add(x int64) { 68 c.metrics.recoveredBytesTotal.Add(float64(x)) 69 c.metrics.setRecoveryBytesInUse(c.currentBytes.Add(x)) 70 } 71 72 func (c *replayController) Sub(x int64) { 73 c.metrics.setRecoveryBytesInUse(c.currentBytes.Sub(x)) 74 75 } 76 77 func (c *replayController) Cur() int { 78 return int(c.currentBytes.Load()) 79 } 80 81 func (c *replayController) Flush() { 82 if c.isFlushing.CAS(false, true) { 83 c.metrics.recoveryIsFlushing.Set(1) 84 prior := c.currentBytes.Load() 85 level.Debug(util_log.Logger).Log( 86 "msg", "replay flusher pre-flush", 87 "bytes", humanize.Bytes(uint64(prior)), 88 ) 89 90 c.flusher.Flush() 91 92 after := c.currentBytes.Load() 93 level.Debug(util_log.Logger).Log( 94 "msg", "replay flusher post-flush", 95 "bytes", humanize.Bytes(uint64(after)), 96 ) 97 98 c.isFlushing.Store(false) 99 c.metrics.recoveryIsFlushing.Set(0) 100 101 // Broadcast after lock is acquired to prevent race conditions with cpu scheduling 102 // where the flush code could finish before the goroutine which initiated it gets to call 103 // c.cond.Wait() 104 c.cond.L.Lock() 105 c.cond.Broadcast() 106 c.cond.L.Unlock() 107 } 108 } 109 110 // WithBackPressure is expected to call replayController.Add in the passed function to increase the managed byte count. 111 // It will call the function as long as there is expected room before the memory cap and will then flush data intermittently 112 // when needed. 113 func (c *replayController) WithBackPressure(fn func() error) error { 114 // Account for backpressure and wait until there's enough memory to continue replaying the WAL 115 c.cond.L.Lock() 116 117 // use 90% as a threshold since we'll be adding to it. 118 for c.Cur() > int(c.cfg.ReplayMemoryCeiling)*9/10 { 119 // too much backpressure, flush 120 go c.Flush() 121 c.cond.Wait() 122 } 123 124 // Don't hold the lock while executing the provided function. 125 // This ensures we can run functions concurrently. 126 c.cond.L.Unlock() 127 128 return fn() 129 }