github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/chunk/fetcher/fetcher.go (about) 1 package fetcher 2 3 import ( 4 "context" 5 "errors" 6 "sync" 7 8 "github.com/go-kit/log/level" 9 "github.com/prometheus/client_golang/prometheus" 10 "github.com/prometheus/client_golang/prometheus/promauto" 11 "github.com/prometheus/prometheus/promql" 12 13 "github.com/grafana/loki/pkg/logqlmodel/stats" 14 "github.com/grafana/loki/pkg/storage/chunk" 15 "github.com/grafana/loki/pkg/storage/chunk/cache" 16 "github.com/grafana/loki/pkg/storage/chunk/client" 17 "github.com/grafana/loki/pkg/storage/config" 18 util_log "github.com/grafana/loki/pkg/util/log" 19 "github.com/grafana/loki/pkg/util/spanlogger" 20 ) 21 22 var ( 23 errAsyncBufferFull = errors.New("the async buffer is full") 24 skipped = promauto.NewCounter(prometheus.CounterOpts{ 25 Name: "loki_chunk_fetcher_cache_skipped_buffer_full_total", 26 Help: "Total number of operations against cache that have been skipped.", 27 }) 28 chunkFetcherCacheQueueEnqueue = promauto.NewCounter(prometheus.CounterOpts{ 29 Name: "loki_chunk_fetcher_cache_enqueued_total", 30 Help: "Total number of chunks enqueued to a buffer to be asynchronously written back to the chunk cache.", 31 }) 32 chunkFetcherCacheQueueDequeue = promauto.NewCounter(prometheus.CounterOpts{ 33 Name: "loki_chunk_fetcher_cache_dequeued_total", 34 Help: "Total number of chunks asynchronously dequeued from a buffer and written back to the chunk cache.", 35 }) 36 cacheCorrupt = promauto.NewCounter(prometheus.CounterOpts{ 37 Namespace: "loki", 38 Name: "cache_corrupt_chunks_total", 39 Help: "Total count of corrupt chunks found in cache.", 40 }) 41 ) 42 43 const chunkDecodeParallelism = 16 44 45 // Fetcher deals with fetching chunk contents from the cache/store, 46 // and writing back any misses to the cache. Also responsible for decoding 47 // chunks from the cache, in parallel. 48 type Fetcher struct { 49 schema config.SchemaConfig 50 storage client.Client 51 cache cache.Cache 52 cacheStubs bool 53 54 wait sync.WaitGroup 55 decodeRequests chan decodeRequest 56 57 maxAsyncConcurrency int 58 maxAsyncBufferSize int 59 60 asyncQueue chan []chunk.Chunk 61 stopOnce sync.Once 62 stop chan struct{} 63 } 64 65 type decodeRequest struct { 66 chunk chunk.Chunk 67 buf []byte 68 responses chan decodeResponse 69 } 70 71 type decodeResponse struct { 72 chunk chunk.Chunk 73 err error 74 } 75 76 // New makes a new ChunkFetcher. 77 func New(cacher cache.Cache, cacheStubs bool, schema config.SchemaConfig, storage client.Client, maxAsyncConcurrency int, maxAsyncBufferSize int) (*Fetcher, error) { 78 c := &Fetcher{ 79 schema: schema, 80 storage: storage, 81 cache: cacher, 82 cacheStubs: cacheStubs, 83 decodeRequests: make(chan decodeRequest), 84 maxAsyncConcurrency: maxAsyncConcurrency, 85 maxAsyncBufferSize: maxAsyncBufferSize, 86 stop: make(chan struct{}), 87 } 88 89 c.wait.Add(chunkDecodeParallelism) 90 for i := 0; i < chunkDecodeParallelism; i++ { 91 go c.worker() 92 } 93 94 // Start a number of goroutines - processing async operations - equal 95 // to the max concurrency we have. 96 c.asyncQueue = make(chan []chunk.Chunk, c.maxAsyncBufferSize) 97 for i := 0; i < c.maxAsyncConcurrency; i++ { 98 go c.asyncWriteBackCacheQueueProcessLoop() 99 } 100 101 return c, nil 102 } 103 104 func (c *Fetcher) writeBackCacheAsync(fromStorage []chunk.Chunk) error { 105 select { 106 case c.asyncQueue <- fromStorage: 107 chunkFetcherCacheQueueEnqueue.Add(float64(len(fromStorage))) 108 return nil 109 default: 110 return errAsyncBufferFull 111 } 112 } 113 114 func (c *Fetcher) asyncWriteBackCacheQueueProcessLoop() { 115 for { 116 select { 117 case fromStorage := <-c.asyncQueue: 118 chunkFetcherCacheQueueDequeue.Add(float64(len(fromStorage))) 119 cacheErr := c.WriteBackCache(context.Background(), fromStorage) 120 if cacheErr != nil { 121 level.Warn(util_log.Logger).Log("msg", "could not write fetched chunks from storage into chunk cache", "err", cacheErr) 122 } 123 case <-c.stop: 124 return 125 } 126 } 127 } 128 129 // Stop the ChunkFetcher. 130 func (c *Fetcher) Stop() { 131 c.stopOnce.Do(func() { 132 close(c.decodeRequests) 133 c.wait.Wait() 134 c.cache.Stop() 135 close(c.stop) 136 }) 137 } 138 139 func (c *Fetcher) worker() { 140 defer c.wait.Done() 141 decodeContext := chunk.NewDecodeContext() 142 for req := range c.decodeRequests { 143 err := req.chunk.Decode(decodeContext, req.buf) 144 if err != nil { 145 cacheCorrupt.Inc() 146 } 147 req.responses <- decodeResponse{ 148 chunk: req.chunk, 149 err: err, 150 } 151 } 152 } 153 154 func (c *Fetcher) Cache() cache.Cache { 155 return c.cache 156 } 157 158 func (c *Fetcher) Client() client.Client { 159 return c.storage 160 } 161 162 // FetchChunks fetches a set of chunks from cache and store. Note that the keys passed in must be 163 // lexicographically sorted, while the returned chunks are not in the same order as the passed in chunks. 164 func (c *Fetcher) FetchChunks(ctx context.Context, chunks []chunk.Chunk, keys []string) ([]chunk.Chunk, error) { 165 if ctx.Err() != nil { 166 return nil, ctx.Err() 167 } 168 log, ctx := spanlogger.New(ctx, "ChunkStore.FetchChunks") 169 defer log.Span.Finish() 170 171 // Now fetch the actual chunk data from Memcache / S3 172 cacheHits, cacheBufs, _, err := c.cache.Fetch(ctx, keys) 173 if err != nil { 174 level.Warn(log).Log("msg", "error fetching from cache", "err", err) 175 } 176 fromCache, missing, err := c.processCacheResponse(ctx, chunks, cacheHits, cacheBufs) 177 if err != nil { 178 level.Warn(log).Log("msg", "error process response from cache", "err", err) 179 } 180 181 var fromStorage []chunk.Chunk 182 if len(missing) > 0 { 183 fromStorage, err = c.storage.GetChunks(ctx, missing) 184 } 185 186 // normally these stats would be collected by the cache.statsCollector wrapper, but chunks are written back 187 // to the cache asynchronously in the background and we lose the context 188 var bytes int 189 for _, c := range fromStorage { 190 bytes += c.Size() 191 } 192 193 st := stats.FromContext(ctx) 194 st.AddCacheEntriesStored(stats.ChunkCache, len(fromStorage)) 195 st.AddCacheBytesSent(stats.ChunkCache, bytes) 196 197 // Always cache any chunks we did get 198 if cacheErr := c.writeBackCacheAsync(fromStorage); cacheErr != nil { 199 if cacheErr == errAsyncBufferFull { 200 skipped.Inc() 201 } 202 level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr) 203 } 204 205 if err != nil { 206 // Don't rely on Cortex error translation here. 207 return nil, promql.ErrStorage{Err: err} 208 } 209 210 allChunks := append(fromCache, fromStorage...) 211 return allChunks, nil 212 } 213 214 func (c *Fetcher) WriteBackCache(ctx context.Context, chunks []chunk.Chunk) error { 215 keys := make([]string, 0, len(chunks)) 216 bufs := make([][]byte, 0, len(chunks)) 217 for i := range chunks { 218 var encoded []byte 219 var err error 220 if !c.cacheStubs { 221 encoded, err = chunks[i].Encoded() 222 // TODO don't fail, just log and continue? 223 if err != nil { 224 return err 225 } 226 } 227 228 keys = append(keys, c.schema.ExternalKey(chunks[i].ChunkRef)) 229 bufs = append(bufs, encoded) 230 } 231 232 err := c.cache.Store(ctx, keys, bufs) 233 if err != nil { 234 level.Warn(util_log.Logger).Log("msg", "writeBackCache cache store fail", "err", err) 235 } 236 return nil 237 } 238 239 // ProcessCacheResponse decodes the chunks coming back from the cache, separating 240 // hits and misses. 241 func (c *Fetcher) processCacheResponse(ctx context.Context, chunks []chunk.Chunk, keys []string, bufs [][]byte) ([]chunk.Chunk, []chunk.Chunk, error) { 242 var ( 243 requests = make([]decodeRequest, 0, len(keys)) 244 responses = make(chan decodeResponse) 245 missing []chunk.Chunk 246 logger = util_log.WithContext(ctx, util_log.Logger) 247 ) 248 249 i, j := 0, 0 250 for i < len(chunks) && j < len(keys) { 251 chunkKey := c.schema.ExternalKey(chunks[i].ChunkRef) 252 253 if chunkKey < keys[j] { 254 missing = append(missing, chunks[i]) 255 i++ 256 } else if chunkKey > keys[j] { 257 level.Warn(logger).Log("msg", "got chunk from cache we didn't ask for") 258 j++ 259 } else { 260 requests = append(requests, decodeRequest{ 261 chunk: chunks[i], 262 buf: bufs[j], 263 responses: responses, 264 }) 265 i++ 266 j++ 267 } 268 } 269 for ; i < len(chunks); i++ { 270 missing = append(missing, chunks[i]) 271 } 272 level.Debug(logger).Log("chunks", len(chunks), "decodeRequests", len(requests), "missing", len(missing)) 273 274 go func() { 275 for _, request := range requests { 276 c.decodeRequests <- request 277 } 278 }() 279 280 var ( 281 err error 282 found []chunk.Chunk 283 ) 284 for i := 0; i < len(requests); i++ { 285 response := <-responses 286 287 // Don't exit early, as we don't want to block the workers. 288 if response.err != nil { 289 err = response.err 290 } else { 291 found = append(found, response.chunk) 292 } 293 } 294 return found, missing, err 295 } 296 297 func (c *Fetcher) IsChunkNotFoundErr(err error) bool { 298 return c.storage.IsChunkNotFoundErr(err) 299 }