github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/chunk/fetcher/fetcher.go

github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/chunk/fetcher/fetcher.go (about)

     1  package fetcher
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"sync"
     7  
     8  	"github.com/go-kit/log/level"
     9  	"github.com/prometheus/client_golang/prometheus"
    10  	"github.com/prometheus/client_golang/prometheus/promauto"
    11  	"github.com/prometheus/prometheus/promql"
    12  
    13  	"github.com/grafana/loki/pkg/logqlmodel/stats"
    14  	"github.com/grafana/loki/pkg/storage/chunk"
    15  	"github.com/grafana/loki/pkg/storage/chunk/cache"
    16  	"github.com/grafana/loki/pkg/storage/chunk/client"
    17  	"github.com/grafana/loki/pkg/storage/config"
    18  	util_log "github.com/grafana/loki/pkg/util/log"
    19  	"github.com/grafana/loki/pkg/util/spanlogger"
    20  )
    21  
    22  var (
    23  	errAsyncBufferFull = errors.New("the async buffer is full")
    24  	skipped            = promauto.NewCounter(prometheus.CounterOpts{
    25  		Name: "loki_chunk_fetcher_cache_skipped_buffer_full_total",
    26  		Help: "Total number of operations against cache that have been skipped.",
    27  	})
    28  	chunkFetcherCacheQueueEnqueue = promauto.NewCounter(prometheus.CounterOpts{
    29  		Name: "loki_chunk_fetcher_cache_enqueued_total",
    30  		Help: "Total number of chunks enqueued to a buffer to be asynchronously written back to the chunk cache.",
    31  	})
    32  	chunkFetcherCacheQueueDequeue = promauto.NewCounter(prometheus.CounterOpts{
    33  		Name: "loki_chunk_fetcher_cache_dequeued_total",
    34  		Help: "Total number of chunks asynchronously dequeued from a buffer and written back to the chunk cache.",
    35  	})
    36  	cacheCorrupt = promauto.NewCounter(prometheus.CounterOpts{
    37  		Namespace: "loki",
    38  		Name:      "cache_corrupt_chunks_total",
    39  		Help:      "Total count of corrupt chunks found in cache.",
    40  	})
    41  )
    42  
    43  const chunkDecodeParallelism = 16
    44  
    45  // Fetcher deals with fetching chunk contents from the cache/store,
    46  // and writing back any misses to the cache.  Also responsible for decoding
    47  // chunks from the cache, in parallel.
    48  type Fetcher struct {
    49  	schema     config.SchemaConfig
    50  	storage    client.Client
    51  	cache      cache.Cache
    52  	cacheStubs bool
    53  
    54  	wait           sync.WaitGroup
    55  	decodeRequests chan decodeRequest
    56  
    57  	maxAsyncConcurrency int
    58  	maxAsyncBufferSize  int
    59  
    60  	asyncQueue chan []chunk.Chunk
    61  	stopOnce   sync.Once
    62  	stop       chan struct{}
    63  }
    64  
    65  type decodeRequest struct {
    66  	chunk     chunk.Chunk
    67  	buf       []byte
    68  	responses chan decodeResponse
    69  }
    70  
    71  type decodeResponse struct {
    72  	chunk chunk.Chunk
    73  	err   error
    74  }
    75  
    76  // New makes a new ChunkFetcher.
    77  func New(cacher cache.Cache, cacheStubs bool, schema config.SchemaConfig, storage client.Client, maxAsyncConcurrency int, maxAsyncBufferSize int) (*Fetcher, error) {
    78  	c := &Fetcher{
    79  		schema:              schema,
    80  		storage:             storage,
    81  		cache:               cacher,
    82  		cacheStubs:          cacheStubs,
    83  		decodeRequests:      make(chan decodeRequest),
    84  		maxAsyncConcurrency: maxAsyncConcurrency,
    85  		maxAsyncBufferSize:  maxAsyncBufferSize,
    86  		stop:                make(chan struct{}),
    87  	}
    88  
    89  	c.wait.Add(chunkDecodeParallelism)
    90  	for i := 0; i < chunkDecodeParallelism; i++ {
    91  		go c.worker()
    92  	}
    93  
    94  	// Start a number of goroutines - processing async operations - equal
    95  	// to the max concurrency we have.
    96  	c.asyncQueue = make(chan []chunk.Chunk, c.maxAsyncBufferSize)
    97  	for i := 0; i < c.maxAsyncConcurrency; i++ {
    98  		go c.asyncWriteBackCacheQueueProcessLoop()
    99  	}
   100  
   101  	return c, nil
   102  }
   103  
   104  func (c *Fetcher) writeBackCacheAsync(fromStorage []chunk.Chunk) error {
   105  	select {
   106  	case c.asyncQueue <- fromStorage:
   107  		chunkFetcherCacheQueueEnqueue.Add(float64(len(fromStorage)))
   108  		return nil
   109  	default:
   110  		return errAsyncBufferFull
   111  	}
   112  }
   113  
   114  func (c *Fetcher) asyncWriteBackCacheQueueProcessLoop() {
   115  	for {
   116  		select {
   117  		case fromStorage := <-c.asyncQueue:
   118  			chunkFetcherCacheQueueDequeue.Add(float64(len(fromStorage)))
   119  			cacheErr := c.WriteBackCache(context.Background(), fromStorage)
   120  			if cacheErr != nil {
   121  				level.Warn(util_log.Logger).Log("msg", "could not write fetched chunks from storage into chunk cache", "err", cacheErr)
   122  			}
   123  		case <-c.stop:
   124  			return
   125  		}
   126  	}
   127  }
   128  
   129  // Stop the ChunkFetcher.
   130  func (c *Fetcher) Stop() {
   131  	c.stopOnce.Do(func() {
   132  		close(c.decodeRequests)
   133  		c.wait.Wait()
   134  		c.cache.Stop()
   135  		close(c.stop)
   136  	})
   137  }
   138  
   139  func (c *Fetcher) worker() {
   140  	defer c.wait.Done()
   141  	decodeContext := chunk.NewDecodeContext()
   142  	for req := range c.decodeRequests {
   143  		err := req.chunk.Decode(decodeContext, req.buf)
   144  		if err != nil {
   145  			cacheCorrupt.Inc()
   146  		}
   147  		req.responses <- decodeResponse{
   148  			chunk: req.chunk,
   149  			err:   err,
   150  		}
   151  	}
   152  }
   153  
   154  func (c *Fetcher) Cache() cache.Cache {
   155  	return c.cache
   156  }
   157  
   158  func (c *Fetcher) Client() client.Client {
   159  	return c.storage
   160  }
   161  
   162  // FetchChunks fetches a set of chunks from cache and store. Note that the keys passed in must be
   163  // lexicographically sorted, while the returned chunks are not in the same order as the passed in chunks.
   164  func (c *Fetcher) FetchChunks(ctx context.Context, chunks []chunk.Chunk, keys []string) ([]chunk.Chunk, error) {
   165  	if ctx.Err() != nil {
   166  		return nil, ctx.Err()
   167  	}
   168  	log, ctx := spanlogger.New(ctx, "ChunkStore.FetchChunks")
   169  	defer log.Span.Finish()
   170  
   171  	// Now fetch the actual chunk data from Memcache / S3
   172  	cacheHits, cacheBufs, _, err := c.cache.Fetch(ctx, keys)
   173  	if err != nil {
   174  		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
   175  	}
   176  	fromCache, missing, err := c.processCacheResponse(ctx, chunks, cacheHits, cacheBufs)
   177  	if err != nil {
   178  		level.Warn(log).Log("msg", "error process response from cache", "err", err)
   179  	}
   180  
   181  	var fromStorage []chunk.Chunk
   182  	if len(missing) > 0 {
   183  		fromStorage, err = c.storage.GetChunks(ctx, missing)
   184  	}
   185  
   186  	// normally these stats would be collected by the cache.statsCollector wrapper, but chunks are written back
   187  	// to the cache asynchronously in the background and we lose the context
   188  	var bytes int
   189  	for _, c := range fromStorage {
   190  		bytes += c.Size()
   191  	}
   192  
   193  	st := stats.FromContext(ctx)
   194  	st.AddCacheEntriesStored(stats.ChunkCache, len(fromStorage))
   195  	st.AddCacheBytesSent(stats.ChunkCache, bytes)
   196  
   197  	// Always cache any chunks we did get
   198  	if cacheErr := c.writeBackCacheAsync(fromStorage); cacheErr != nil {
   199  		if cacheErr == errAsyncBufferFull {
   200  			skipped.Inc()
   201  		}
   202  		level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
   203  	}
   204  
   205  	if err != nil {
   206  		// Don't rely on Cortex error translation here.
   207  		return nil, promql.ErrStorage{Err: err}
   208  	}
   209  
   210  	allChunks := append(fromCache, fromStorage...)
   211  	return allChunks, nil
   212  }
   213  
   214  func (c *Fetcher) WriteBackCache(ctx context.Context, chunks []chunk.Chunk) error {
   215  	keys := make([]string, 0, len(chunks))
   216  	bufs := make([][]byte, 0, len(chunks))
   217  	for i := range chunks {
   218  		var encoded []byte
   219  		var err error
   220  		if !c.cacheStubs {
   221  			encoded, err = chunks[i].Encoded()
   222  			// TODO don't fail, just log and continue?
   223  			if err != nil {
   224  				return err
   225  			}
   226  		}
   227  
   228  		keys = append(keys, c.schema.ExternalKey(chunks[i].ChunkRef))
   229  		bufs = append(bufs, encoded)
   230  	}
   231  
   232  	err := c.cache.Store(ctx, keys, bufs)
   233  	if err != nil {
   234  		level.Warn(util_log.Logger).Log("msg", "writeBackCache cache store fail", "err", err)
   235  	}
   236  	return nil
   237  }
   238  
   239  // ProcessCacheResponse decodes the chunks coming back from the cache, separating
   240  // hits and misses.
   241  func (c *Fetcher) processCacheResponse(ctx context.Context, chunks []chunk.Chunk, keys []string, bufs [][]byte) ([]chunk.Chunk, []chunk.Chunk, error) {
   242  	var (
   243  		requests  = make([]decodeRequest, 0, len(keys))
   244  		responses = make(chan decodeResponse)
   245  		missing   []chunk.Chunk
   246  		logger    = util_log.WithContext(ctx, util_log.Logger)
   247  	)
   248  
   249  	i, j := 0, 0
   250  	for i < len(chunks) && j < len(keys) {
   251  		chunkKey := c.schema.ExternalKey(chunks[i].ChunkRef)
   252  
   253  		if chunkKey < keys[j] {
   254  			missing = append(missing, chunks[i])
   255  			i++
   256  		} else if chunkKey > keys[j] {
   257  			level.Warn(logger).Log("msg", "got chunk from cache we didn't ask for")
   258  			j++
   259  		} else {
   260  			requests = append(requests, decodeRequest{
   261  				chunk:     chunks[i],
   262  				buf:       bufs[j],
   263  				responses: responses,
   264  			})
   265  			i++
   266  			j++
   267  		}
   268  	}
   269  	for ; i < len(chunks); i++ {
   270  		missing = append(missing, chunks[i])
   271  	}
   272  	level.Debug(logger).Log("chunks", len(chunks), "decodeRequests", len(requests), "missing", len(missing))
   273  
   274  	go func() {
   275  		for _, request := range requests {
   276  			c.decodeRequests <- request
   277  		}
   278  	}()
   279  
   280  	var (
   281  		err   error
   282  		found []chunk.Chunk
   283  	)
   284  	for i := 0; i < len(requests); i++ {
   285  		response := <-responses
   286  
   287  		// Don't exit early, as we don't want to block the workers.
   288  		if response.err != nil {
   289  			err = response.err
   290  		} else {
   291  			found = append(found, response.chunk)
   292  		}
   293  	}
   294  	return found, missing, err
   295  }
   296  
   297  func (c *Fetcher) IsChunkNotFoundErr(err error) bool {
   298  	return c.storage.IsChunkNotFoundErr(err)
   299  }