github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/series/index/caching_index_client.go (about)

     1  package index
     2  
     3  import (
     4  	"context"
     5  	fmt "fmt"
     6  	"sync"
     7  	"time"
     8  	"unsafe"
     9  
    10  	"github.com/go-kit/log"
    11  	"github.com/go-kit/log/level"
    12  	"github.com/gogo/protobuf/proto"
    13  	"github.com/grafana/dskit/tenant"
    14  	"github.com/prometheus/client_golang/prometheus"
    15  	"github.com/prometheus/client_golang/prometheus/promauto"
    16  
    17  	"github.com/grafana/loki/pkg/storage/chunk/cache"
    18  	util_log "github.com/grafana/loki/pkg/util/log"
    19  )
    20  
    21  var (
    22  	cacheCorruptErrs = promauto.NewCounter(prometheus.CounterOpts{
    23  		Namespace: "loki",
    24  		Name:      "querier_index_cache_corruptions_total",
    25  		Help:      "The number of cache corruptions for the index cache.",
    26  	})
    27  	cacheHits = promauto.NewCounter(prometheus.CounterOpts{
    28  		Namespace: "loki",
    29  		Name:      "querier_index_cache_hits_total",
    30  		Help:      "The number of cache hits for the index cache.",
    31  	})
    32  	cacheGets = promauto.NewCounter(prometheus.CounterOpts{
    33  		Namespace: "loki",
    34  		Name:      "querier_index_cache_gets_total",
    35  		Help:      "The number of gets for the index cache.",
    36  	})
    37  	cachePuts = promauto.NewCounter(prometheus.CounterOpts{
    38  		Namespace: "loki",
    39  		Name:      "querier_index_cache_puts_total",
    40  		Help:      "The number of puts for the index cache.",
    41  	})
    42  	cacheEncodeErrs = promauto.NewCounter(prometheus.CounterOpts{
    43  		Namespace: "loki",
    44  		Name:      "querier_index_cache_encode_errors_total",
    45  		Help:      "The number of errors for the index cache while encoding the body.",
    46  	})
    47  )
    48  
    49  // CardinalityExceededError is returned when the user reads a row that
    50  // is too large.
    51  type CardinalityExceededError struct {
    52  	MetricName, LabelName string
    53  	Size, Limit           int32
    54  }
    55  
    56  func (e CardinalityExceededError) Error() string {
    57  	return fmt.Sprintf("cardinality limit exceeded for %s{%s}; %d entries, more than limit of %d",
    58  		e.MetricName, e.LabelName, e.Size, e.Limit)
    59  }
    60  
    61  // StoreLimits helps get Limits specific to Queries for Stores
    62  type StoreLimits interface {
    63  	CardinalityLimit(string) int
    64  }
    65  
    66  const sep = "\xff"
    67  
    68  type cachingIndexClient struct {
    69  	Client
    70  	cache               cache.Cache
    71  	validity            time.Duration
    72  	limits              StoreLimits
    73  	logger              log.Logger
    74  	disableBroadQueries bool
    75  }
    76  
    77  func NewCachingIndexClient(client Client, c cache.Cache, validity time.Duration, limits StoreLimits, logger log.Logger, disableBroadQueries bool) Client {
    78  	if c == nil || cache.IsEmptyTieredCache(c) {
    79  		return client
    80  	}
    81  
    82  	return &cachingIndexClient{
    83  		Client:              client,
    84  		cache:               cache.NewSnappy(c, logger),
    85  		validity:            validity,
    86  		limits:              limits,
    87  		logger:              logger,
    88  		disableBroadQueries: disableBroadQueries,
    89  	}
    90  }
    91  
    92  func (s *cachingIndexClient) Stop() {
    93  	s.cache.Stop()
    94  	s.Client.Stop()
    95  }
    96  
    97  func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []Query, callback QueryPagesCallback) error {
    98  	if len(queries) == 0 {
    99  		return nil
   100  	}
   101  
   102  	if isChunksQuery(queries[0]) || !s.disableBroadQueries {
   103  		return s.doBroadQueries(ctx, queries, callback)
   104  	}
   105  
   106  	return s.doQueries(ctx, queries, callback)
   107  }
   108  
   109  func (s *cachingIndexClient) queryPages(ctx context.Context, queries []Query, callback QueryPagesCallback,
   110  	buildIndexQuery func(query Query) Query, buildQueryKey func(query Query) string,
   111  ) error {
   112  	if len(queries) == 0 {
   113  		return nil
   114  	}
   115  
   116  	userID, err := tenant.TenantID(ctx)
   117  	if err != nil {
   118  		return err
   119  	}
   120  	cardinalityLimit := int32(s.limits.CardinalityLimit(userID))
   121  
   122  	// Build list of keys to lookup in the cache.
   123  	keys := make([]string, 0, len(queries))
   124  	queriesByKey := make(map[string][]Query, len(queries))
   125  	for _, query := range queries {
   126  		key := buildQueryKey(query)
   127  		keys = append(keys, key)
   128  		queriesByKey[key] = append(queriesByKey[key], query)
   129  	}
   130  
   131  	batches, misses := s.cacheFetch(ctx, keys)
   132  	for _, batch := range batches {
   133  		if cardinalityLimit > 0 && batch.Cardinality > cardinalityLimit {
   134  			return CardinalityExceededError{
   135  				Size:  batch.Cardinality,
   136  				Limit: cardinalityLimit,
   137  			}
   138  		}
   139  
   140  		queries := queriesByKey[batch.Key]
   141  		for _, query := range queries {
   142  			callback(query, batch)
   143  		}
   144  	}
   145  
   146  	if len(misses) == 0 {
   147  		return nil
   148  	}
   149  
   150  	// Build list of cachable queries for the queries that missed the cache.
   151  	var (
   152  		resultsMtx      sync.Mutex
   153  		results         = make(map[string]ReadBatch, len(misses))
   154  		cacheableMissed = make([]Query, 0, len(misses))
   155  		expiryTime      = time.Now().Add(s.validity)
   156  	)
   157  
   158  	for _, key := range misses {
   159  		queries := queriesByKey[key]
   160  		// queries with the same key would build same index query so just consider one of them
   161  		cacheableMissed = append(cacheableMissed, buildIndexQuery(queries[0]))
   162  
   163  		rb := ReadBatch{
   164  			Key:    key,
   165  			Expiry: expiryTime.UnixNano(),
   166  		}
   167  
   168  		// If the query is cacheable forever, nil the expiry.
   169  		if queries[0].Immutable {
   170  			rb.Expiry = 0
   171  		}
   172  
   173  		results[key] = rb
   174  	}
   175  
   176  	err = s.Client.QueryPages(ctx, cacheableMissed, func(cacheableQuery Query, r ReadBatchResult) bool {
   177  		resultsMtx.Lock()
   178  		defer resultsMtx.Unlock()
   179  		key := buildQueryKey(cacheableQuery)
   180  		existing := results[key]
   181  		for iter := r.Iterator(); iter.Next(); {
   182  			existing.Entries = append(existing.Entries, CacheEntry{Column: iter.RangeValue(), Value: iter.Value()})
   183  		}
   184  		results[key] = existing
   185  		return true
   186  	})
   187  	if err != nil {
   188  		return err
   189  	}
   190  
   191  	{
   192  		resultsMtx.Lock()
   193  		defer resultsMtx.Unlock()
   194  		keys := make([]string, 0, len(results))
   195  		batches := make([]ReadBatch, 0, len(results))
   196  		var cardinalityErr error
   197  		for key, batch := range results {
   198  			cardinality := int32(len(batch.Entries))
   199  			if cardinalityLimit > 0 && cardinality > cardinalityLimit {
   200  				batch.Cardinality = cardinality
   201  				batch.Entries = nil
   202  				cardinalityErr = CardinalityExceededError{
   203  					Size:  cardinality,
   204  					Limit: cardinalityLimit,
   205  				}
   206  			}
   207  
   208  			keys = append(keys, key)
   209  			batches = append(batches, batch)
   210  			if cardinalityErr != nil {
   211  				continue
   212  			}
   213  
   214  			queries := queriesByKey[key]
   215  			for _, query := range queries {
   216  				callback(query, batch)
   217  			}
   218  		}
   219  
   220  		err := s.cacheStore(ctx, keys, batches)
   221  		if cardinalityErr != nil {
   222  			return cardinalityErr
   223  		}
   224  		return err
   225  	}
   226  }
   227  
   228  // doBroadQueries does broad queries on the store by using just TableName and HashValue.
   229  // This is useful for chunks queries or when we need to reduce QPS on index store at the expense of higher cache requirement.
   230  // All the results from the index store are cached and the responses are filtered based on the actual queries.
   231  func (s *cachingIndexClient) doBroadQueries(ctx context.Context, queries []Query, callback QueryPagesCallback) error {
   232  	// We cache all the entries for queries looking for Chunk IDs, so filter client side.
   233  	callback = QueryFilter(callback)
   234  	return s.queryPages(ctx, queries, callback, func(query Query) Query {
   235  		return Query{TableName: query.TableName, HashValue: query.HashValue}
   236  	}, func(q Query) string {
   237  		return q.TableName + sep + q.HashValue
   238  	})
   239  }
   240  
   241  // doQueries does the exact same queries as opposed to doBroadQueries doing broad queries with limited query params.
   242  func (s *cachingIndexClient) doQueries(ctx context.Context, queries []Query, callback QueryPagesCallback) error {
   243  	return s.queryPages(ctx, queries, callback, func(query Query) Query {
   244  		return query
   245  	}, func(q Query) string {
   246  		ret := q.TableName + sep + q.HashValue
   247  
   248  		if len(q.RangeValuePrefix) != 0 {
   249  			ret += sep + yoloString(q.RangeValuePrefix)
   250  		}
   251  
   252  		if len(q.ValueEqual) != 0 {
   253  			ret += sep + yoloString(q.ValueEqual)
   254  		}
   255  
   256  		return ret
   257  	})
   258  }
   259  
   260  func yoloString(buf []byte) string {
   261  	return *((*string)(unsafe.Pointer(&buf)))
   262  }
   263  
   264  // Iterator implements chunk.ReadBatch.
   265  func (b ReadBatch) Iterator() ReadBatchIterator {
   266  	return &readBatchIterator{
   267  		index:     -1,
   268  		readBatch: b,
   269  	}
   270  }
   271  
   272  type readBatchIterator struct {
   273  	index     int
   274  	readBatch ReadBatch
   275  }
   276  
   277  // Len implements chunk.ReadBatchIterator.
   278  func (b *readBatchIterator) Next() bool {
   279  	b.index++
   280  	return b.index < len(b.readBatch.Entries)
   281  }
   282  
   283  // RangeValue implements chunk.ReadBatchIterator.
   284  func (b *readBatchIterator) RangeValue() []byte {
   285  	return b.readBatch.Entries[b.index].Column
   286  }
   287  
   288  // Value implements chunk.ReadBatchIterator.
   289  func (b *readBatchIterator) Value() []byte {
   290  	return b.readBatch.Entries[b.index].Value
   291  }
   292  
   293  func isChunksQuery(q Query) bool {
   294  	// RangeValueStart would only be set for chunks query.
   295  	return len(q.RangeValueStart) != 0
   296  }
   297  
   298  func (s *cachingIndexClient) cacheStore(ctx context.Context, keys []string, batches []ReadBatch) error {
   299  	logger := util_log.WithContext(ctx, s.logger)
   300  	cachePuts.Add(float64(len(keys)))
   301  
   302  	// We're doing the hashing to handle unicode and key len properly.
   303  	// Memcache fails for unicode keys and keys longer than 250 Bytes.
   304  	hashed := make([]string, 0, len(keys))
   305  	bufs := make([][]byte, 0, len(batches))
   306  	for i := range keys {
   307  		if len(batches[i].Entries) != 0 {
   308  			level.Debug(logger).Log("msg", "caching index entries", "key", keys[i], "count", len(batches[i].Entries))
   309  		}
   310  		hashed = append(hashed, cache.HashKey(keys[i]))
   311  		out, err := proto.Marshal(&batches[i])
   312  		if err != nil {
   313  			level.Warn(s.logger).Log("msg", "error marshalling ReadBatch", "err", err)
   314  			cacheEncodeErrs.Inc()
   315  			return err
   316  		}
   317  		bufs = append(bufs, out)
   318  	}
   319  
   320  	return s.cache.Store(ctx, hashed, bufs)
   321  }
   322  
   323  func (s *cachingIndexClient) cacheFetch(ctx context.Context, keys []string) (batches []ReadBatch, missed []string) {
   324  	cacheGets.Add(float64(len(keys)))
   325  
   326  	// Build a map from hash -> key; NB there can be collisions here; we'll fetch
   327  	// the last hash.
   328  	hashedKeys := make(map[string]string, len(keys))
   329  	for _, key := range keys {
   330  		hashedKeys[cache.HashKey(key)] = key
   331  	}
   332  
   333  	// Build a list of hashes; could be less than keys due to collisions.
   334  	hashes := make([]string, 0, len(keys))
   335  	for hash := range hashedKeys {
   336  		hashes = append(hashes, hash)
   337  	}
   338  
   339  	// Look up the hashes in a single batch.  If we get an error, we just "miss" all
   340  	// of the keys.  Eventually I want to push all the errors to the leafs of the cache
   341  	// tree, to the caches only return found & missed.
   342  	foundHashes, bufs, _, _ := s.cache.Fetch(ctx, hashes)
   343  
   344  	// Reverse the hash, unmarshal the index entries, check we got what we expected
   345  	// and that its still valid.
   346  	batches = make([]ReadBatch, 0, len(foundHashes))
   347  	for j, foundHash := range foundHashes {
   348  		key := hashedKeys[foundHash]
   349  		var readBatch ReadBatch
   350  
   351  		if err := proto.Unmarshal(bufs[j], &readBatch); err != nil {
   352  			level.Warn(util_log.Logger).Log("msg", "error unmarshalling index entry from cache", "err", err)
   353  			cacheCorruptErrs.Inc()
   354  			continue
   355  		}
   356  
   357  		// Make sure the hash(key) is not a collision in the cache by looking at the
   358  		// key in the value.
   359  		if key != readBatch.Key {
   360  			level.Debug(util_log.Logger).Log("msg", "dropping index cache entry due to key collision", "key", key, "readBatch.Key", readBatch.Key, "expiry")
   361  			continue
   362  		}
   363  
   364  		if readBatch.Expiry != 0 && time.Now().After(time.Unix(0, readBatch.Expiry)) {
   365  			continue
   366  		}
   367  
   368  		cacheHits.Inc()
   369  		batches = append(batches, readBatch)
   370  	}
   371  
   372  	// Finally work out what we're missing.
   373  	misses := make(map[string]struct{}, len(keys))
   374  	for _, key := range keys {
   375  		misses[key] = struct{}{}
   376  	}
   377  	for i := range batches {
   378  		delete(misses, batches[i].Key)
   379  	}
   380  	missed = make([]string, 0, len(misses))
   381  	for miss := range misses {
   382  		missed = append(missed, miss)
   383  	}
   384  
   385  	return batches, missed
   386  }