github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/postings_list_cache_lru.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package index
    22  
    23  import (
    24  	"container/list"
    25  	"errors"
    26  	"math"
    27  	"sync"
    28  
    29  	"github.com/cespare/xxhash/v2"
    30  	"github.com/pborman/uuid"
    31  )
    32  
    33  // PostingsListLRU implements a non-thread safe fixed size LRU cache of postings lists
    34  // that were resolved by running a given query against a particular segment for a given
    35  // field and pattern type (term vs regexp). Normally a key in the LRU would look like:
    36  //
    37  // type key struct {
    38  //    segmentUUID uuid.UUID
    39  //    field       string
    40  //    pattern     string
    41  //    patternType PatternType
    42  // }
    43  //
    44  // However, some of the postings lists that we will store in the LRU have a fixed lifecycle
    45  // because they reference mmap'd byte slices which will eventually be unmap'd. To prevent
    46  // these postings lists that point to unmap'd regions from remaining in the LRU, we want to
    47  // support the ability to efficiently purge the LRU of any postings list that belong to a
    48  // given segment. This isn't technically required for correctness as once a segment has been
    49  // closed, its old postings list in the LRU will never be accessed again (since they are only
    50  // addressable by that segments UUID), but we purge them from the LRU before closing the segment
    51  // anyways as an additional safety precaution.
    52  //
    53  // Instead of adding additional tracking on-top of an existing generic LRU, we've created a
    54  // specialized LRU that instead of having a single top-level map pointing into the linked-list,
    55  // has a two-level map where the top level map is keyed by segment UUID and the second level map
    56  // is keyed by the field/pattern/patternType.
    57  //
    58  // As a result, when a segment is ready to be closed, they can call into the cache with their
    59  // UUID and we can efficiently remove all the entries corresponding to that segment from the
    60  // LRU. The specialization has the additional nice property that we don't need to allocate everytime
    61  // we add an item to the LRU due to the interface{} conversion.
    62  type postingsListLRU struct {
    63  	shards    []*postingsListLRUShard
    64  	numShards uint64
    65  }
    66  
    67  type postingsListLRUShard struct {
    68  	sync.RWMutex
    69  	size      int
    70  	evictList *list.List
    71  	items     map[uuid.Array]map[PostingsListCacheKey]*list.Element
    72  }
    73  
    74  // entry is used to hold a value in the evictList.
    75  type entry struct {
    76  	uuid           uuid.UUID
    77  	key            PostingsListCacheKey
    78  	cachedPostings *cachedPostings
    79  }
    80  
    81  // PostingsListCacheKey is a postings list cache key.
    82  type PostingsListCacheKey struct {
    83  	Field       string
    84  	Pattern     string
    85  	PatternType PatternType
    86  }
    87  
    88  type postingsListLRUOptions struct {
    89  	size   int
    90  	shards int
    91  }
    92  
    93  // newPostingsListLRU constructs an LRU of the given size.
    94  func newPostingsListLRU(opts postingsListLRUOptions) (*postingsListLRU, error) {
    95  	size, shards := opts.size, opts.shards
    96  	if size <= 0 {
    97  		return nil, errors.New("must provide a positive size")
    98  	}
    99  	if shards <= 0 {
   100  		return nil, errors.New("must provide a positive shards")
   101  	}
   102  
   103  	lruShards := make([]*postingsListLRUShard, 0, shards)
   104  	for i := 0; i < shards; i++ {
   105  		lruShard := newPostingsListLRUShard(int(math.Ceil(float64(size) / float64(shards))))
   106  		lruShards = append(lruShards, lruShard)
   107  	}
   108  
   109  	return &postingsListLRU{
   110  		shards:    lruShards,
   111  		numShards: uint64(len(lruShards)),
   112  	}, nil
   113  }
   114  
   115  // newPostingsListLRU constructs an LRU of the given size.
   116  func newPostingsListLRUShard(size int) *postingsListLRUShard {
   117  	return &postingsListLRUShard{
   118  		size:      size,
   119  		evictList: list.New(),
   120  		items:     make(map[uuid.Array]map[PostingsListCacheKey]*list.Element),
   121  	}
   122  }
   123  
   124  func (c *postingsListLRU) shard(
   125  	segmentUUID uuid.UUID,
   126  	field, pattern string,
   127  	patternType PatternType,
   128  ) *postingsListLRUShard {
   129  	idx := hashKey(segmentUUID, field, pattern, patternType) % c.numShards
   130  	return c.shards[idx]
   131  }
   132  
   133  func (c *postingsListLRU) Add(
   134  	segmentUUID uuid.UUID,
   135  	field string,
   136  	pattern string,
   137  	patternType PatternType,
   138  	cachedPostings *cachedPostings,
   139  ) bool {
   140  	shard := c.shard(segmentUUID, field, pattern, patternType)
   141  	return shard.Add(segmentUUID, field, pattern, patternType, cachedPostings)
   142  }
   143  
   144  func (c *postingsListLRU) Get(
   145  	segmentUUID uuid.UUID,
   146  	field string,
   147  	pattern string,
   148  	patternType PatternType,
   149  ) (*cachedPostings, bool) {
   150  	shard := c.shard(segmentUUID, field, pattern, patternType)
   151  	return shard.Get(segmentUUID, field, pattern, patternType)
   152  }
   153  
   154  func (c *postingsListLRU) Remove(
   155  	segmentUUID uuid.UUID,
   156  	field string,
   157  	pattern string,
   158  	patternType PatternType,
   159  ) bool {
   160  	shard := c.shard(segmentUUID, field, pattern, patternType)
   161  	return shard.Remove(segmentUUID, field, pattern, patternType)
   162  }
   163  
   164  func (c *postingsListLRU) PurgeSegment(segmentUUID uuid.UUID) {
   165  	for _, shard := range c.shards {
   166  		shard.PurgeSegment(segmentUUID)
   167  	}
   168  }
   169  
   170  func (c *postingsListLRU) Len() int {
   171  	n := 0
   172  	for _, shard := range c.shards {
   173  		n += shard.Len()
   174  	}
   175  	return n
   176  }
   177  
   178  // Add adds a value to the cache. Returns true if an eviction occurred.
   179  func (c *postingsListLRUShard) Add(
   180  	segmentUUID uuid.UUID,
   181  	field string,
   182  	pattern string,
   183  	patternType PatternType,
   184  	cachedPostings *cachedPostings,
   185  ) (evicted bool) {
   186  	c.Lock()
   187  	defer c.Unlock()
   188  
   189  	newKey := newKey(field, pattern, patternType)
   190  	// Check for existing item.
   191  	uuidArray := segmentUUID.Array()
   192  	if uuidEntries, ok := c.items[uuidArray]; ok {
   193  		if ent, ok := uuidEntries[newKey]; ok {
   194  			// If it already exists, just move it to the front. This avoids storing
   195  			// the same item in the LRU twice which is important because the maps
   196  			// can only point to one entry at a time and we use them for purges. Also,
   197  			// it saves space by avoiding storing duplicate values.
   198  			c.evictList.MoveToFront(ent)
   199  			ent.Value.(*entry).cachedPostings = cachedPostings
   200  			return false
   201  		}
   202  	}
   203  
   204  	// Add new item.
   205  	var (
   206  		ent = &entry{
   207  			uuid:           segmentUUID,
   208  			key:            newKey,
   209  			cachedPostings: cachedPostings,
   210  		}
   211  		entry = c.evictList.PushFront(ent)
   212  	)
   213  	if queries, ok := c.items[uuidArray]; ok {
   214  		queries[newKey] = entry
   215  	} else {
   216  		c.items[uuidArray] = map[PostingsListCacheKey]*list.Element{
   217  			newKey: entry,
   218  		}
   219  	}
   220  
   221  	evict := c.evictList.Len() > c.size
   222  	// Verify size not exceeded.
   223  	if evict {
   224  		c.removeOldest()
   225  	}
   226  	return evict
   227  }
   228  
   229  // Get looks up a key's value from the cache.
   230  func (c *postingsListLRUShard) Get(
   231  	segmentUUID uuid.UUID,
   232  	field string,
   233  	pattern string,
   234  	patternType PatternType,
   235  ) (*cachedPostings, bool) {
   236  	c.Lock()
   237  	defer c.Unlock()
   238  
   239  	newKey := newKey(field, pattern, patternType)
   240  	uuidArray := segmentUUID.Array()
   241  
   242  	uuidEntries, ok := c.items[uuidArray]
   243  	if !ok {
   244  		return nil, false
   245  	}
   246  
   247  	ent, ok := uuidEntries[newKey]
   248  	if !ok {
   249  		return nil, false
   250  	}
   251  
   252  	c.evictList.MoveToFront(ent)
   253  	return ent.Value.(*entry).cachedPostings, true
   254  }
   255  
   256  // Remove removes the provided key from the cache, returning if the
   257  // key was contained.
   258  func (c *postingsListLRUShard) Remove(
   259  	segmentUUID uuid.UUID,
   260  	field string,
   261  	pattern string,
   262  	patternType PatternType,
   263  ) bool {
   264  	c.Lock()
   265  	defer c.Unlock()
   266  
   267  	newKey := newKey(field, pattern, patternType)
   268  	uuidArray := segmentUUID.Array()
   269  	if uuidEntries, ok := c.items[uuidArray]; ok {
   270  		if ent, ok := uuidEntries[newKey]; ok {
   271  			c.removeElement(ent)
   272  			return true
   273  		}
   274  	}
   275  
   276  	return false
   277  }
   278  
   279  func (c *postingsListLRUShard) PurgeSegment(segmentUUID uuid.UUID) {
   280  	c.Lock()
   281  	defer c.Unlock()
   282  
   283  	if uuidEntries, ok := c.items[segmentUUID.Array()]; ok {
   284  		for _, ent := range uuidEntries {
   285  			c.removeElement(ent)
   286  		}
   287  	}
   288  }
   289  
   290  // Len returns the number of items in the cache.
   291  func (c *postingsListLRUShard) Len() int {
   292  	c.RLock()
   293  	defer c.RUnlock()
   294  	return c.evictList.Len()
   295  }
   296  
   297  // removeOldest removes the oldest item from the cache.
   298  func (c *postingsListLRUShard) removeOldest() {
   299  	ent := c.evictList.Back()
   300  	if ent != nil {
   301  		c.removeElement(ent)
   302  	}
   303  }
   304  
   305  // removeElement is used to remove a given list element from the cache
   306  func (c *postingsListLRUShard) removeElement(e *list.Element) {
   307  	c.evictList.Remove(e)
   308  	entry := e.Value.(*entry)
   309  
   310  	if patterns, ok := c.items[entry.uuid.Array()]; ok {
   311  		delete(patterns, entry.key)
   312  		if len(patterns) == 0 {
   313  			delete(c.items, entry.uuid.Array())
   314  		}
   315  	}
   316  }
   317  
   318  func newKey(field, pattern string, patternType PatternType) PostingsListCacheKey {
   319  	return PostingsListCacheKey{
   320  		Field:       field,
   321  		Pattern:     pattern,
   322  		PatternType: patternType,
   323  	}
   324  }
   325  
   326  func hashKey(
   327  	segmentUUID uuid.UUID,
   328  	field string,
   329  	pattern string,
   330  	patternType PatternType,
   331  ) uint64 {
   332  	var h xxhash.Digest
   333  	h.Reset()
   334  	_, _ = h.Write(segmentUUID)
   335  	_, _ = h.WriteString(field)
   336  	_, _ = h.WriteString(pattern)
   337  	_, _ = h.WriteString(string(patternType))
   338  	return h.Sum64()
   339  }