github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/postings_list_cache_lru.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package index 22 23 import ( 24 "container/list" 25 "errors" 26 "math" 27 "sync" 28 29 "github.com/cespare/xxhash/v2" 30 "github.com/pborman/uuid" 31 ) 32 33 // PostingsListLRU implements a non-thread safe fixed size LRU cache of postings lists 34 // that were resolved by running a given query against a particular segment for a given 35 // field and pattern type (term vs regexp). Normally a key in the LRU would look like: 36 // 37 // type key struct { 38 // segmentUUID uuid.UUID 39 // field string 40 // pattern string 41 // patternType PatternType 42 // } 43 // 44 // However, some of the postings lists that we will store in the LRU have a fixed lifecycle 45 // because they reference mmap'd byte slices which will eventually be unmap'd. To prevent 46 // these postings lists that point to unmap'd regions from remaining in the LRU, we want to 47 // support the ability to efficiently purge the LRU of any postings list that belong to a 48 // given segment. This isn't technically required for correctness as once a segment has been 49 // closed, its old postings list in the LRU will never be accessed again (since they are only 50 // addressable by that segments UUID), but we purge them from the LRU before closing the segment 51 // anyways as an additional safety precaution. 52 // 53 // Instead of adding additional tracking on-top of an existing generic LRU, we've created a 54 // specialized LRU that instead of having a single top-level map pointing into the linked-list, 55 // has a two-level map where the top level map is keyed by segment UUID and the second level map 56 // is keyed by the field/pattern/patternType. 57 // 58 // As a result, when a segment is ready to be closed, they can call into the cache with their 59 // UUID and we can efficiently remove all the entries corresponding to that segment from the 60 // LRU. The specialization has the additional nice property that we don't need to allocate everytime 61 // we add an item to the LRU due to the interface{} conversion. 62 type postingsListLRU struct { 63 shards []*postingsListLRUShard 64 numShards uint64 65 } 66 67 type postingsListLRUShard struct { 68 sync.RWMutex 69 size int 70 evictList *list.List 71 items map[uuid.Array]map[PostingsListCacheKey]*list.Element 72 } 73 74 // entry is used to hold a value in the evictList. 75 type entry struct { 76 uuid uuid.UUID 77 key PostingsListCacheKey 78 cachedPostings *cachedPostings 79 } 80 81 // PostingsListCacheKey is a postings list cache key. 82 type PostingsListCacheKey struct { 83 Field string 84 Pattern string 85 PatternType PatternType 86 } 87 88 type postingsListLRUOptions struct { 89 size int 90 shards int 91 } 92 93 // newPostingsListLRU constructs an LRU of the given size. 94 func newPostingsListLRU(opts postingsListLRUOptions) (*postingsListLRU, error) { 95 size, shards := opts.size, opts.shards 96 if size <= 0 { 97 return nil, errors.New("must provide a positive size") 98 } 99 if shards <= 0 { 100 return nil, errors.New("must provide a positive shards") 101 } 102 103 lruShards := make([]*postingsListLRUShard, 0, shards) 104 for i := 0; i < shards; i++ { 105 lruShard := newPostingsListLRUShard(int(math.Ceil(float64(size) / float64(shards)))) 106 lruShards = append(lruShards, lruShard) 107 } 108 109 return &postingsListLRU{ 110 shards: lruShards, 111 numShards: uint64(len(lruShards)), 112 }, nil 113 } 114 115 // newPostingsListLRU constructs an LRU of the given size. 116 func newPostingsListLRUShard(size int) *postingsListLRUShard { 117 return &postingsListLRUShard{ 118 size: size, 119 evictList: list.New(), 120 items: make(map[uuid.Array]map[PostingsListCacheKey]*list.Element), 121 } 122 } 123 124 func (c *postingsListLRU) shard( 125 segmentUUID uuid.UUID, 126 field, pattern string, 127 patternType PatternType, 128 ) *postingsListLRUShard { 129 idx := hashKey(segmentUUID, field, pattern, patternType) % c.numShards 130 return c.shards[idx] 131 } 132 133 func (c *postingsListLRU) Add( 134 segmentUUID uuid.UUID, 135 field string, 136 pattern string, 137 patternType PatternType, 138 cachedPostings *cachedPostings, 139 ) bool { 140 shard := c.shard(segmentUUID, field, pattern, patternType) 141 return shard.Add(segmentUUID, field, pattern, patternType, cachedPostings) 142 } 143 144 func (c *postingsListLRU) Get( 145 segmentUUID uuid.UUID, 146 field string, 147 pattern string, 148 patternType PatternType, 149 ) (*cachedPostings, bool) { 150 shard := c.shard(segmentUUID, field, pattern, patternType) 151 return shard.Get(segmentUUID, field, pattern, patternType) 152 } 153 154 func (c *postingsListLRU) Remove( 155 segmentUUID uuid.UUID, 156 field string, 157 pattern string, 158 patternType PatternType, 159 ) bool { 160 shard := c.shard(segmentUUID, field, pattern, patternType) 161 return shard.Remove(segmentUUID, field, pattern, patternType) 162 } 163 164 func (c *postingsListLRU) PurgeSegment(segmentUUID uuid.UUID) { 165 for _, shard := range c.shards { 166 shard.PurgeSegment(segmentUUID) 167 } 168 } 169 170 func (c *postingsListLRU) Len() int { 171 n := 0 172 for _, shard := range c.shards { 173 n += shard.Len() 174 } 175 return n 176 } 177 178 // Add adds a value to the cache. Returns true if an eviction occurred. 179 func (c *postingsListLRUShard) Add( 180 segmentUUID uuid.UUID, 181 field string, 182 pattern string, 183 patternType PatternType, 184 cachedPostings *cachedPostings, 185 ) (evicted bool) { 186 c.Lock() 187 defer c.Unlock() 188 189 newKey := newKey(field, pattern, patternType) 190 // Check for existing item. 191 uuidArray := segmentUUID.Array() 192 if uuidEntries, ok := c.items[uuidArray]; ok { 193 if ent, ok := uuidEntries[newKey]; ok { 194 // If it already exists, just move it to the front. This avoids storing 195 // the same item in the LRU twice which is important because the maps 196 // can only point to one entry at a time and we use them for purges. Also, 197 // it saves space by avoiding storing duplicate values. 198 c.evictList.MoveToFront(ent) 199 ent.Value.(*entry).cachedPostings = cachedPostings 200 return false 201 } 202 } 203 204 // Add new item. 205 var ( 206 ent = &entry{ 207 uuid: segmentUUID, 208 key: newKey, 209 cachedPostings: cachedPostings, 210 } 211 entry = c.evictList.PushFront(ent) 212 ) 213 if queries, ok := c.items[uuidArray]; ok { 214 queries[newKey] = entry 215 } else { 216 c.items[uuidArray] = map[PostingsListCacheKey]*list.Element{ 217 newKey: entry, 218 } 219 } 220 221 evict := c.evictList.Len() > c.size 222 // Verify size not exceeded. 223 if evict { 224 c.removeOldest() 225 } 226 return evict 227 } 228 229 // Get looks up a key's value from the cache. 230 func (c *postingsListLRUShard) Get( 231 segmentUUID uuid.UUID, 232 field string, 233 pattern string, 234 patternType PatternType, 235 ) (*cachedPostings, bool) { 236 c.Lock() 237 defer c.Unlock() 238 239 newKey := newKey(field, pattern, patternType) 240 uuidArray := segmentUUID.Array() 241 242 uuidEntries, ok := c.items[uuidArray] 243 if !ok { 244 return nil, false 245 } 246 247 ent, ok := uuidEntries[newKey] 248 if !ok { 249 return nil, false 250 } 251 252 c.evictList.MoveToFront(ent) 253 return ent.Value.(*entry).cachedPostings, true 254 } 255 256 // Remove removes the provided key from the cache, returning if the 257 // key was contained. 258 func (c *postingsListLRUShard) Remove( 259 segmentUUID uuid.UUID, 260 field string, 261 pattern string, 262 patternType PatternType, 263 ) bool { 264 c.Lock() 265 defer c.Unlock() 266 267 newKey := newKey(field, pattern, patternType) 268 uuidArray := segmentUUID.Array() 269 if uuidEntries, ok := c.items[uuidArray]; ok { 270 if ent, ok := uuidEntries[newKey]; ok { 271 c.removeElement(ent) 272 return true 273 } 274 } 275 276 return false 277 } 278 279 func (c *postingsListLRUShard) PurgeSegment(segmentUUID uuid.UUID) { 280 c.Lock() 281 defer c.Unlock() 282 283 if uuidEntries, ok := c.items[segmentUUID.Array()]; ok { 284 for _, ent := range uuidEntries { 285 c.removeElement(ent) 286 } 287 } 288 } 289 290 // Len returns the number of items in the cache. 291 func (c *postingsListLRUShard) Len() int { 292 c.RLock() 293 defer c.RUnlock() 294 return c.evictList.Len() 295 } 296 297 // removeOldest removes the oldest item from the cache. 298 func (c *postingsListLRUShard) removeOldest() { 299 ent := c.evictList.Back() 300 if ent != nil { 301 c.removeElement(ent) 302 } 303 } 304 305 // removeElement is used to remove a given list element from the cache 306 func (c *postingsListLRUShard) removeElement(e *list.Element) { 307 c.evictList.Remove(e) 308 entry := e.Value.(*entry) 309 310 if patterns, ok := c.items[entry.uuid.Array()]; ok { 311 delete(patterns, entry.key) 312 if len(patterns) == 0 { 313 delete(c.items, entry.uuid.Array()) 314 } 315 } 316 } 317 318 func newKey(field, pattern string, patternType PatternType) PostingsListCacheKey { 319 return PostingsListCacheKey{ 320 Field: field, 321 Pattern: pattern, 322 PatternType: patternType, 323 } 324 } 325 326 func hashKey( 327 segmentUUID uuid.UUID, 328 field string, 329 pattern string, 330 patternType PatternType, 331 ) uint64 { 332 var h xxhash.Digest 333 h.Reset() 334 _, _ = h.Write(segmentUUID) 335 _, _ = h.WriteString(field) 336 _, _ = h.WriteString(pattern) 337 _, _ = h.WriteString(string(patternType)) 338 return h.Sum64() 339 }