github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/postings_list_cache.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package index
    22  
    23  import (
    24  	"errors"
    25  	"math"
    26  	"time"
    27  
    28  	"github.com/m3db/m3/src/m3ninx/generated/proto/querypb"
    29  	"github.com/m3db/m3/src/m3ninx/postings"
    30  	"github.com/m3db/m3/src/m3ninx/search"
    31  	"github.com/m3db/m3/src/x/instrument"
    32  
    33  	"github.com/pborman/uuid"
    34  	"github.com/uber-go/tally"
    35  	"go.uber.org/zap"
    36  )
    37  
    38  var errInstrumentOptions = errors.New("no instrument options set")
    39  
    40  // PatternType is an enum for the various pattern types. It allows us
    41  // separate them logically within the cache.
    42  type PatternType string
    43  
    44  // Closer represents a function that will close managed resources.
    45  type Closer func()
    46  
    47  const (
    48  	// PatternTypeRegexp indicates that the pattern is of type regexp.
    49  	PatternTypeRegexp PatternType = "regexp"
    50  	// PatternTypeTerm indicates that the pattern is of type term.
    51  	PatternTypeTerm PatternType = "term"
    52  	// PatternTypeField indicates that the pattern is of type field.
    53  	PatternTypeField PatternType = "field"
    54  	// PatternTypeSearch indicates that the pattern is of type search.
    55  	PatternTypeSearch PatternType = "search"
    56  
    57  	reportLoopInterval = 10 * time.Second
    58  	emptyPattern       = ""
    59  )
    60  
    61  // PostingsListCacheOptions is the options struct for the query cache.
    62  type PostingsListCacheOptions struct {
    63  	InstrumentOptions instrument.Options
    64  }
    65  
    66  // Validate will return an error if the options are not valid.
    67  func (o PostingsListCacheOptions) Validate() error {
    68  	if o.InstrumentOptions == nil {
    69  		return errInstrumentOptions
    70  	}
    71  	return nil
    72  }
    73  
    74  // PostingsListCache implements an LRU for caching queries and their results.
    75  type PostingsListCache struct {
    76  	lru *postingsListLRU
    77  
    78  	size    int
    79  	opts    PostingsListCacheOptions
    80  	metrics *postingsListCacheMetrics
    81  
    82  	logger *zap.Logger
    83  }
    84  
    85  // NewPostingsListCache creates a new query cache.
    86  func NewPostingsListCache(
    87  	size int,
    88  	opts PostingsListCacheOptions,
    89  ) (*PostingsListCache, error) {
    90  	err := opts.Validate()
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  
    95  	lru, err := newPostingsListLRU(postingsListLRUOptions{
    96  		size: size,
    97  		// Use ~1000 items per shard.
    98  		shards: int(math.Ceil(float64(size) / 1000)),
    99  	})
   100  	if err != nil {
   101  		return nil, err
   102  	}
   103  
   104  	plc := &PostingsListCache{
   105  		lru:     lru,
   106  		size:    size,
   107  		opts:    opts,
   108  		metrics: newPostingsListCacheMetrics(opts.InstrumentOptions.MetricsScope()),
   109  		logger:  opts.InstrumentOptions.Logger(),
   110  	}
   111  
   112  	return plc, nil
   113  }
   114  
   115  // Start the background report loop and return a Closer to cleanup.
   116  func (q *PostingsListCache) Start() Closer {
   117  	return q.startReportLoop()
   118  }
   119  
   120  // GetRegexp returns the cached results for the provided regexp query, if any.
   121  func (q *PostingsListCache) GetRegexp(
   122  	segmentUUID uuid.UUID,
   123  	field string,
   124  	pattern string,
   125  ) (postings.List, bool) {
   126  	return q.get(segmentUUID, field, pattern, PatternTypeRegexp)
   127  }
   128  
   129  // GetTerm returns the cached results for the provided term query, if any.
   130  func (q *PostingsListCache) GetTerm(
   131  	segmentUUID uuid.UUID,
   132  	field string,
   133  	pattern string,
   134  ) (postings.List, bool) {
   135  	return q.get(segmentUUID, field, pattern, PatternTypeTerm)
   136  }
   137  
   138  // GetField returns the cached results for the provided field query, if any.
   139  func (q *PostingsListCache) GetField(
   140  	segmentUUID uuid.UUID,
   141  	field string,
   142  ) (postings.List, bool) {
   143  	return q.get(segmentUUID, field, emptyPattern, PatternTypeField)
   144  }
   145  
   146  // GetSearch returns the cached results for the provided search query, if any.
   147  func (q *PostingsListCache) GetSearch(
   148  	segmentUUID uuid.UUID,
   149  	query string,
   150  ) (postings.List, bool) {
   151  	return q.get(segmentUUID, query, emptyPattern, PatternTypeSearch)
   152  }
   153  
   154  func (q *PostingsListCache) get(
   155  	segmentUUID uuid.UUID,
   156  	field string,
   157  	pattern string,
   158  	patternType PatternType,
   159  ) (postings.List, bool) {
   160  	entry, ok := q.lru.Get(segmentUUID, field, pattern, patternType)
   161  	q.emitCacheGetMetrics(patternType, ok)
   162  	if !ok {
   163  		return nil, false
   164  	}
   165  
   166  	return entry.postings, ok
   167  }
   168  
   169  type cachedPostings struct {
   170  	// key
   171  	segmentUUID uuid.UUID
   172  	field       string
   173  	pattern     string
   174  	patternType PatternType
   175  
   176  	// value
   177  	postings postings.List
   178  	// searchQuery is only set for search queries.
   179  	searchQuery *querypb.Query
   180  }
   181  
   182  // PutRegexp updates the LRU with the result of the regexp query.
   183  func (q *PostingsListCache) PutRegexp(
   184  	segmentUUID uuid.UUID,
   185  	field string,
   186  	pattern string,
   187  	pl postings.List,
   188  ) {
   189  	q.put(segmentUUID, field, pattern, PatternTypeRegexp, nil, pl)
   190  }
   191  
   192  // PutTerm updates the LRU with the result of the term query.
   193  func (q *PostingsListCache) PutTerm(
   194  	segmentUUID uuid.UUID,
   195  	field string,
   196  	pattern string,
   197  	pl postings.List,
   198  ) {
   199  	q.put(segmentUUID, field, pattern, PatternTypeTerm, nil, pl)
   200  }
   201  
   202  // PutField updates the LRU with the result of the field query.
   203  func (q *PostingsListCache) PutField(
   204  	segmentUUID uuid.UUID,
   205  	field string,
   206  	pl postings.List,
   207  ) {
   208  	q.put(segmentUUID, field, emptyPattern, PatternTypeField, nil, pl)
   209  }
   210  
   211  // PutSearch updates the LRU with the result of a search query.
   212  func (q *PostingsListCache) PutSearch(
   213  	segmentUUID uuid.UUID,
   214  	queryStr string,
   215  	query search.Query,
   216  	pl postings.List,
   217  ) {
   218  	q.put(segmentUUID, queryStr, emptyPattern, PatternTypeSearch, query, pl)
   219  }
   220  
   221  func (q *PostingsListCache) put(
   222  	segmentUUID uuid.UUID,
   223  	field string,
   224  	pattern string,
   225  	patternType PatternType,
   226  	searchQuery search.Query,
   227  	pl postings.List,
   228  ) {
   229  	var searchQueryProto *querypb.Query
   230  	if searchQuery != nil {
   231  		searchQueryProto = searchQuery.ToProto()
   232  	}
   233  
   234  	value := &cachedPostings{
   235  		segmentUUID: segmentUUID,
   236  		field:       field,
   237  		pattern:     pattern,
   238  		patternType: patternType,
   239  		searchQuery: searchQueryProto,
   240  		postings:    pl,
   241  	}
   242  	q.lru.Add(segmentUUID, field, pattern, patternType, value)
   243  
   244  	q.emitCachePutMetrics(patternType)
   245  }
   246  
   247  // PurgeSegment removes all postings lists associated with the specified
   248  // segment from the cache.
   249  func (q *PostingsListCache) PurgeSegment(segmentUUID uuid.UUID) {
   250  	q.lru.PurgeSegment(segmentUUID)
   251  }
   252  
   253  // startReportLoop starts a background process that will call Report()
   254  // on a regular basis and returns a function that will end the background
   255  // process.
   256  func (q *PostingsListCache) startReportLoop() Closer {
   257  	doneCh := make(chan struct{})
   258  
   259  	go func() {
   260  		for {
   261  			select {
   262  			case <-doneCh:
   263  				return
   264  			default:
   265  			}
   266  
   267  			q.Report()
   268  			time.Sleep(reportLoopInterval)
   269  		}
   270  	}()
   271  
   272  	return func() { close(doneCh) }
   273  }
   274  
   275  // CachedPattern defines a cached pattern.
   276  type CachedPattern struct {
   277  	CacheKey    PostingsListCacheKey
   278  	SearchQuery *querypb.Query
   279  	Postings    postings.List
   280  }
   281  
   282  // CachedPatternsResult defines the result of a cached pattern.
   283  type CachedPatternsResult struct {
   284  	InRegistry      bool
   285  	TotalPatterns   int
   286  	MatchedPatterns int
   287  }
   288  
   289  // CachedPatternForEachFn defines a function for iterating a cached pattern.
   290  type CachedPatternForEachFn func(CachedPattern)
   291  
   292  // CachedPatternsQuery defines a cached pattern query.
   293  type CachedPatternsQuery struct {
   294  	PatternType *PatternType
   295  }
   296  
   297  // CachedPatterns returns cached patterns for given query.
   298  func (q *PostingsListCache) CachedPatterns(
   299  	uuid uuid.UUID,
   300  	query CachedPatternsQuery,
   301  	fn CachedPatternForEachFn,
   302  ) CachedPatternsResult {
   303  	var result CachedPatternsResult
   304  
   305  	for _, shard := range q.lru.shards {
   306  		shard.RLock()
   307  		result = shardCachedPatternsWithRLock(uuid, query, fn, shard, result)
   308  		shard.RUnlock()
   309  	}
   310  
   311  	return result
   312  }
   313  
   314  func shardCachedPatternsWithRLock(
   315  	uuid uuid.UUID,
   316  	query CachedPatternsQuery,
   317  	fn CachedPatternForEachFn,
   318  	shard *postingsListLRUShard,
   319  	result CachedPatternsResult,
   320  ) CachedPatternsResult {
   321  	segmentPostings, ok := shard.items[uuid.Array()]
   322  	if !ok {
   323  		return result
   324  	}
   325  
   326  	result.InRegistry = true
   327  	result.TotalPatterns += len(segmentPostings)
   328  	for key, value := range segmentPostings {
   329  		if v := query.PatternType; v != nil && *v != key.PatternType {
   330  			continue
   331  		}
   332  
   333  		fn(CachedPattern{
   334  			CacheKey:    key,
   335  			SearchQuery: value.Value.(*entry).cachedPostings.searchQuery,
   336  			Postings:    value.Value.(*entry).cachedPostings.postings,
   337  		})
   338  		result.MatchedPatterns++
   339  	}
   340  
   341  	return result
   342  }
   343  
   344  // Report will emit metrics about the status of the cache.
   345  func (q *PostingsListCache) Report() {
   346  	q.metrics.capacity.Update(float64(q.size))
   347  }
   348  
   349  func (q *PostingsListCache) emitCacheGetMetrics(patternType PatternType, hit bool) {
   350  	var method *postingsListCacheMethodMetrics
   351  	switch patternType {
   352  	case PatternTypeRegexp:
   353  		method = q.metrics.regexp
   354  	case PatternTypeTerm:
   355  		method = q.metrics.term
   356  	case PatternTypeField:
   357  		method = q.metrics.field
   358  	case PatternTypeSearch:
   359  		method = q.metrics.search
   360  	default:
   361  		method = q.metrics.unknown // should never happen
   362  	}
   363  	if hit {
   364  		method.hits.Inc(1)
   365  	} else {
   366  		method.misses.Inc(1)
   367  	}
   368  }
   369  
   370  func (q *PostingsListCache) emitCachePutMetrics(patternType PatternType) {
   371  	switch patternType {
   372  	case PatternTypeRegexp:
   373  		q.metrics.regexp.puts.Inc(1)
   374  	case PatternTypeTerm:
   375  		q.metrics.term.puts.Inc(1)
   376  	case PatternTypeField:
   377  		q.metrics.field.puts.Inc(1)
   378  	case PatternTypeSearch:
   379  		q.metrics.search.puts.Inc(1)
   380  	default:
   381  		q.metrics.unknown.puts.Inc(1) // should never happen
   382  	}
   383  }
   384  
   385  type postingsListCacheMetrics struct {
   386  	regexp  *postingsListCacheMethodMetrics
   387  	term    *postingsListCacheMethodMetrics
   388  	field   *postingsListCacheMethodMetrics
   389  	search  *postingsListCacheMethodMetrics
   390  	unknown *postingsListCacheMethodMetrics
   391  
   392  	size     tally.Gauge
   393  	capacity tally.Gauge
   394  
   395  	pooledGet              tally.Counter
   396  	pooledGetErrAddIter    tally.Counter
   397  	pooledPut              tally.Counter
   398  	pooledPutErrNotMutable tally.Counter
   399  }
   400  
   401  func newPostingsListCacheMetrics(scope tally.Scope) *postingsListCacheMetrics {
   402  	return &postingsListCacheMetrics{
   403  		regexp: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{
   404  			"query_type": "regexp",
   405  		})),
   406  		term: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{
   407  			"query_type": "term",
   408  		})),
   409  		field: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{
   410  			"query_type": "field",
   411  		})),
   412  		search: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{
   413  			"query_type": "search",
   414  		})),
   415  		unknown: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{
   416  			"query_type": "unknown",
   417  		})),
   418  		size:      scope.Gauge("size"),
   419  		capacity:  scope.Gauge("capacity"),
   420  		pooledGet: scope.Counter("pooled_get"),
   421  		pooledGetErrAddIter: scope.Tagged(map[string]string{
   422  			"error_type": "add_iter",
   423  		}).Counter("pooled_get_error"),
   424  		pooledPut: scope.Counter("pooled_put"),
   425  		pooledPutErrNotMutable: scope.Tagged(map[string]string{
   426  			"error_type": "not_mutable",
   427  		}).Counter("pooled_put_error"),
   428  	}
   429  }
   430  
   431  type postingsListCacheMethodMetrics struct {
   432  	hits   tally.Counter
   433  	misses tally.Counter
   434  	puts   tally.Counter
   435  }
   436  
   437  func newPostingsListCacheMethodMetrics(scope tally.Scope) *postingsListCacheMethodMetrics {
   438  	return &postingsListCacheMethodMetrics{
   439  		hits:   scope.Counter("hits"),
   440  		misses: scope.Counter("misses"),
   441  		puts:   scope.Counter("puts"),
   442  	}
   443  }