github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/ingester/replay_controller.go (about)

     1  package ingester
     2  
     3  import (
     4  	"sync"
     5  
     6  	"github.com/dustin/go-humanize"
     7  	"github.com/go-kit/log/level"
     8  	"go.uber.org/atomic"
     9  
    10  	util_log "github.com/grafana/loki/pkg/util/log"
    11  )
    12  
    13  type replayFlusher struct {
    14  	i *Ingester
    15  }
    16  
    17  func (f *replayFlusher) Flush() {
    18  	f.i.InitFlushQueues()
    19  	f.i.flush(false) // flush data but don't remove streams from the ingesters
    20  
    21  	// Similar to sweepUsers with the exception that it will not remove streams
    22  	// afterwards to prevent unlinking a stream which may receive later writes from the WAL.
    23  	// We have to do this here after the flushQueues have been drained.
    24  	instances := f.i.getInstances()
    25  
    26  	for _, instance := range instances {
    27  
    28  		_ = instance.streams.ForEach(func(s *stream) (bool, error) {
    29  			f.i.removeFlushedChunks(instance, s, false)
    30  			return true, nil
    31  		})
    32  
    33  	}
    34  
    35  }
    36  
    37  type Flusher interface {
    38  	Flush()
    39  }
    40  
    41  // replayController handles coordinating backpressure between WAL replays and chunk flushing.
    42  type replayController struct {
    43  	// Note, this has to be defined first to make sure it is aligned properly for 32bit ARM OS
    44  	// From https://golang.org/pkg/sync/atomic/#pkg-note-BUG:
    45  	// > On ARM, 386, and 32-bit MIPS, it is the caller's responsibility to arrange for
    46  	// > 64-bit alignment of 64-bit words accessed atomically. The first word in a
    47  	// > variable or in an allocated struct, array, or slice can be relied upon to
    48  	// > be 64-bit aligned.
    49  	currentBytes atomic.Int64
    50  	cfg          WALConfig
    51  	metrics      *ingesterMetrics
    52  	cond         *sync.Cond
    53  	isFlushing   atomic.Bool
    54  	flusher      Flusher
    55  }
    56  
    57  // flusher is expected to reduce pressure via calling Sub
    58  func newReplayController(metrics *ingesterMetrics, cfg WALConfig, flusher Flusher) *replayController {
    59  	return &replayController{
    60  		cfg:     cfg,
    61  		metrics: metrics,
    62  		cond:    sync.NewCond(&sync.Mutex{}),
    63  		flusher: flusher,
    64  	}
    65  }
    66  
    67  func (c *replayController) Add(x int64) {
    68  	c.metrics.recoveredBytesTotal.Add(float64(x))
    69  	c.metrics.setRecoveryBytesInUse(c.currentBytes.Add(x))
    70  }
    71  
    72  func (c *replayController) Sub(x int64) {
    73  	c.metrics.setRecoveryBytesInUse(c.currentBytes.Sub(x))
    74  
    75  }
    76  
    77  func (c *replayController) Cur() int {
    78  	return int(c.currentBytes.Load())
    79  }
    80  
    81  func (c *replayController) Flush() {
    82  	if c.isFlushing.CAS(false, true) {
    83  		c.metrics.recoveryIsFlushing.Set(1)
    84  		prior := c.currentBytes.Load()
    85  		level.Debug(util_log.Logger).Log(
    86  			"msg", "replay flusher pre-flush",
    87  			"bytes", humanize.Bytes(uint64(prior)),
    88  		)
    89  
    90  		c.flusher.Flush()
    91  
    92  		after := c.currentBytes.Load()
    93  		level.Debug(util_log.Logger).Log(
    94  			"msg", "replay flusher post-flush",
    95  			"bytes", humanize.Bytes(uint64(after)),
    96  		)
    97  
    98  		c.isFlushing.Store(false)
    99  		c.metrics.recoveryIsFlushing.Set(0)
   100  
   101  		// Broadcast after lock is acquired to prevent race conditions with cpu scheduling
   102  		// where the flush code could finish before the goroutine which initiated it gets to call
   103  		// c.cond.Wait()
   104  		c.cond.L.Lock()
   105  		c.cond.Broadcast()
   106  		c.cond.L.Unlock()
   107  	}
   108  }
   109  
   110  // WithBackPressure is expected to call replayController.Add in the passed function to increase the managed byte count.
   111  // It will call the function as long as there is expected room before the memory cap and will then flush data intermittently
   112  // when needed.
   113  func (c *replayController) WithBackPressure(fn func() error) error {
   114  	// Account for backpressure and wait until there's enough memory to continue replaying the WAL
   115  	c.cond.L.Lock()
   116  
   117  	// use 90% as a threshold since we'll be adding to it.
   118  	for c.Cur() > int(c.cfg.ReplayMemoryCeiling)*9/10 {
   119  		// too much backpressure, flush
   120  		go c.Flush()
   121  		c.cond.Wait()
   122  	}
   123  
   124  	// Don't hold the lock while executing the provided function.
   125  	// This ensures we can run functions concurrently.
   126  	c.cond.L.Unlock()
   127  
   128  	return fn()
   129  }