github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/postings_list_cache.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package index 22 23 import ( 24 "errors" 25 "math" 26 "time" 27 28 "github.com/m3db/m3/src/m3ninx/generated/proto/querypb" 29 "github.com/m3db/m3/src/m3ninx/postings" 30 "github.com/m3db/m3/src/m3ninx/search" 31 "github.com/m3db/m3/src/x/instrument" 32 33 "github.com/pborman/uuid" 34 "github.com/uber-go/tally" 35 "go.uber.org/zap" 36 ) 37 38 var errInstrumentOptions = errors.New("no instrument options set") 39 40 // PatternType is an enum for the various pattern types. It allows us 41 // separate them logically within the cache. 42 type PatternType string 43 44 // Closer represents a function that will close managed resources. 45 type Closer func() 46 47 const ( 48 // PatternTypeRegexp indicates that the pattern is of type regexp. 49 PatternTypeRegexp PatternType = "regexp" 50 // PatternTypeTerm indicates that the pattern is of type term. 51 PatternTypeTerm PatternType = "term" 52 // PatternTypeField indicates that the pattern is of type field. 53 PatternTypeField PatternType = "field" 54 // PatternTypeSearch indicates that the pattern is of type search. 55 PatternTypeSearch PatternType = "search" 56 57 reportLoopInterval = 10 * time.Second 58 emptyPattern = "" 59 ) 60 61 // PostingsListCacheOptions is the options struct for the query cache. 62 type PostingsListCacheOptions struct { 63 InstrumentOptions instrument.Options 64 } 65 66 // Validate will return an error if the options are not valid. 67 func (o PostingsListCacheOptions) Validate() error { 68 if o.InstrumentOptions == nil { 69 return errInstrumentOptions 70 } 71 return nil 72 } 73 74 // PostingsListCache implements an LRU for caching queries and their results. 75 type PostingsListCache struct { 76 lru *postingsListLRU 77 78 size int 79 opts PostingsListCacheOptions 80 metrics *postingsListCacheMetrics 81 82 logger *zap.Logger 83 } 84 85 // NewPostingsListCache creates a new query cache. 86 func NewPostingsListCache( 87 size int, 88 opts PostingsListCacheOptions, 89 ) (*PostingsListCache, error) { 90 err := opts.Validate() 91 if err != nil { 92 return nil, err 93 } 94 95 lru, err := newPostingsListLRU(postingsListLRUOptions{ 96 size: size, 97 // Use ~1000 items per shard. 98 shards: int(math.Ceil(float64(size) / 1000)), 99 }) 100 if err != nil { 101 return nil, err 102 } 103 104 plc := &PostingsListCache{ 105 lru: lru, 106 size: size, 107 opts: opts, 108 metrics: newPostingsListCacheMetrics(opts.InstrumentOptions.MetricsScope()), 109 logger: opts.InstrumentOptions.Logger(), 110 } 111 112 return plc, nil 113 } 114 115 // Start the background report loop and return a Closer to cleanup. 116 func (q *PostingsListCache) Start() Closer { 117 return q.startReportLoop() 118 } 119 120 // GetRegexp returns the cached results for the provided regexp query, if any. 121 func (q *PostingsListCache) GetRegexp( 122 segmentUUID uuid.UUID, 123 field string, 124 pattern string, 125 ) (postings.List, bool) { 126 return q.get(segmentUUID, field, pattern, PatternTypeRegexp) 127 } 128 129 // GetTerm returns the cached results for the provided term query, if any. 130 func (q *PostingsListCache) GetTerm( 131 segmentUUID uuid.UUID, 132 field string, 133 pattern string, 134 ) (postings.List, bool) { 135 return q.get(segmentUUID, field, pattern, PatternTypeTerm) 136 } 137 138 // GetField returns the cached results for the provided field query, if any. 139 func (q *PostingsListCache) GetField( 140 segmentUUID uuid.UUID, 141 field string, 142 ) (postings.List, bool) { 143 return q.get(segmentUUID, field, emptyPattern, PatternTypeField) 144 } 145 146 // GetSearch returns the cached results for the provided search query, if any. 147 func (q *PostingsListCache) GetSearch( 148 segmentUUID uuid.UUID, 149 query string, 150 ) (postings.List, bool) { 151 return q.get(segmentUUID, query, emptyPattern, PatternTypeSearch) 152 } 153 154 func (q *PostingsListCache) get( 155 segmentUUID uuid.UUID, 156 field string, 157 pattern string, 158 patternType PatternType, 159 ) (postings.List, bool) { 160 entry, ok := q.lru.Get(segmentUUID, field, pattern, patternType) 161 q.emitCacheGetMetrics(patternType, ok) 162 if !ok { 163 return nil, false 164 } 165 166 return entry.postings, ok 167 } 168 169 type cachedPostings struct { 170 // key 171 segmentUUID uuid.UUID 172 field string 173 pattern string 174 patternType PatternType 175 176 // value 177 postings postings.List 178 // searchQuery is only set for search queries. 179 searchQuery *querypb.Query 180 } 181 182 // PutRegexp updates the LRU with the result of the regexp query. 183 func (q *PostingsListCache) PutRegexp( 184 segmentUUID uuid.UUID, 185 field string, 186 pattern string, 187 pl postings.List, 188 ) { 189 q.put(segmentUUID, field, pattern, PatternTypeRegexp, nil, pl) 190 } 191 192 // PutTerm updates the LRU with the result of the term query. 193 func (q *PostingsListCache) PutTerm( 194 segmentUUID uuid.UUID, 195 field string, 196 pattern string, 197 pl postings.List, 198 ) { 199 q.put(segmentUUID, field, pattern, PatternTypeTerm, nil, pl) 200 } 201 202 // PutField updates the LRU with the result of the field query. 203 func (q *PostingsListCache) PutField( 204 segmentUUID uuid.UUID, 205 field string, 206 pl postings.List, 207 ) { 208 q.put(segmentUUID, field, emptyPattern, PatternTypeField, nil, pl) 209 } 210 211 // PutSearch updates the LRU with the result of a search query. 212 func (q *PostingsListCache) PutSearch( 213 segmentUUID uuid.UUID, 214 queryStr string, 215 query search.Query, 216 pl postings.List, 217 ) { 218 q.put(segmentUUID, queryStr, emptyPattern, PatternTypeSearch, query, pl) 219 } 220 221 func (q *PostingsListCache) put( 222 segmentUUID uuid.UUID, 223 field string, 224 pattern string, 225 patternType PatternType, 226 searchQuery search.Query, 227 pl postings.List, 228 ) { 229 var searchQueryProto *querypb.Query 230 if searchQuery != nil { 231 searchQueryProto = searchQuery.ToProto() 232 } 233 234 value := &cachedPostings{ 235 segmentUUID: segmentUUID, 236 field: field, 237 pattern: pattern, 238 patternType: patternType, 239 searchQuery: searchQueryProto, 240 postings: pl, 241 } 242 q.lru.Add(segmentUUID, field, pattern, patternType, value) 243 244 q.emitCachePutMetrics(patternType) 245 } 246 247 // PurgeSegment removes all postings lists associated with the specified 248 // segment from the cache. 249 func (q *PostingsListCache) PurgeSegment(segmentUUID uuid.UUID) { 250 q.lru.PurgeSegment(segmentUUID) 251 } 252 253 // startReportLoop starts a background process that will call Report() 254 // on a regular basis and returns a function that will end the background 255 // process. 256 func (q *PostingsListCache) startReportLoop() Closer { 257 doneCh := make(chan struct{}) 258 259 go func() { 260 for { 261 select { 262 case <-doneCh: 263 return 264 default: 265 } 266 267 q.Report() 268 time.Sleep(reportLoopInterval) 269 } 270 }() 271 272 return func() { close(doneCh) } 273 } 274 275 // CachedPattern defines a cached pattern. 276 type CachedPattern struct { 277 CacheKey PostingsListCacheKey 278 SearchQuery *querypb.Query 279 Postings postings.List 280 } 281 282 // CachedPatternsResult defines the result of a cached pattern. 283 type CachedPatternsResult struct { 284 InRegistry bool 285 TotalPatterns int 286 MatchedPatterns int 287 } 288 289 // CachedPatternForEachFn defines a function for iterating a cached pattern. 290 type CachedPatternForEachFn func(CachedPattern) 291 292 // CachedPatternsQuery defines a cached pattern query. 293 type CachedPatternsQuery struct { 294 PatternType *PatternType 295 } 296 297 // CachedPatterns returns cached patterns for given query. 298 func (q *PostingsListCache) CachedPatterns( 299 uuid uuid.UUID, 300 query CachedPatternsQuery, 301 fn CachedPatternForEachFn, 302 ) CachedPatternsResult { 303 var result CachedPatternsResult 304 305 for _, shard := range q.lru.shards { 306 shard.RLock() 307 result = shardCachedPatternsWithRLock(uuid, query, fn, shard, result) 308 shard.RUnlock() 309 } 310 311 return result 312 } 313 314 func shardCachedPatternsWithRLock( 315 uuid uuid.UUID, 316 query CachedPatternsQuery, 317 fn CachedPatternForEachFn, 318 shard *postingsListLRUShard, 319 result CachedPatternsResult, 320 ) CachedPatternsResult { 321 segmentPostings, ok := shard.items[uuid.Array()] 322 if !ok { 323 return result 324 } 325 326 result.InRegistry = true 327 result.TotalPatterns += len(segmentPostings) 328 for key, value := range segmentPostings { 329 if v := query.PatternType; v != nil && *v != key.PatternType { 330 continue 331 } 332 333 fn(CachedPattern{ 334 CacheKey: key, 335 SearchQuery: value.Value.(*entry).cachedPostings.searchQuery, 336 Postings: value.Value.(*entry).cachedPostings.postings, 337 }) 338 result.MatchedPatterns++ 339 } 340 341 return result 342 } 343 344 // Report will emit metrics about the status of the cache. 345 func (q *PostingsListCache) Report() { 346 q.metrics.capacity.Update(float64(q.size)) 347 } 348 349 func (q *PostingsListCache) emitCacheGetMetrics(patternType PatternType, hit bool) { 350 var method *postingsListCacheMethodMetrics 351 switch patternType { 352 case PatternTypeRegexp: 353 method = q.metrics.regexp 354 case PatternTypeTerm: 355 method = q.metrics.term 356 case PatternTypeField: 357 method = q.metrics.field 358 case PatternTypeSearch: 359 method = q.metrics.search 360 default: 361 method = q.metrics.unknown // should never happen 362 } 363 if hit { 364 method.hits.Inc(1) 365 } else { 366 method.misses.Inc(1) 367 } 368 } 369 370 func (q *PostingsListCache) emitCachePutMetrics(patternType PatternType) { 371 switch patternType { 372 case PatternTypeRegexp: 373 q.metrics.regexp.puts.Inc(1) 374 case PatternTypeTerm: 375 q.metrics.term.puts.Inc(1) 376 case PatternTypeField: 377 q.metrics.field.puts.Inc(1) 378 case PatternTypeSearch: 379 q.metrics.search.puts.Inc(1) 380 default: 381 q.metrics.unknown.puts.Inc(1) // should never happen 382 } 383 } 384 385 type postingsListCacheMetrics struct { 386 regexp *postingsListCacheMethodMetrics 387 term *postingsListCacheMethodMetrics 388 field *postingsListCacheMethodMetrics 389 search *postingsListCacheMethodMetrics 390 unknown *postingsListCacheMethodMetrics 391 392 size tally.Gauge 393 capacity tally.Gauge 394 395 pooledGet tally.Counter 396 pooledGetErrAddIter tally.Counter 397 pooledPut tally.Counter 398 pooledPutErrNotMutable tally.Counter 399 } 400 401 func newPostingsListCacheMetrics(scope tally.Scope) *postingsListCacheMetrics { 402 return &postingsListCacheMetrics{ 403 regexp: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{ 404 "query_type": "regexp", 405 })), 406 term: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{ 407 "query_type": "term", 408 })), 409 field: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{ 410 "query_type": "field", 411 })), 412 search: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{ 413 "query_type": "search", 414 })), 415 unknown: newPostingsListCacheMethodMetrics(scope.Tagged(map[string]string{ 416 "query_type": "unknown", 417 })), 418 size: scope.Gauge("size"), 419 capacity: scope.Gauge("capacity"), 420 pooledGet: scope.Counter("pooled_get"), 421 pooledGetErrAddIter: scope.Tagged(map[string]string{ 422 "error_type": "add_iter", 423 }).Counter("pooled_get_error"), 424 pooledPut: scope.Counter("pooled_put"), 425 pooledPutErrNotMutable: scope.Tagged(map[string]string{ 426 "error_type": "not_mutable", 427 }).Counter("pooled_put_error"), 428 } 429 } 430 431 type postingsListCacheMethodMetrics struct { 432 hits tally.Counter 433 misses tally.Counter 434 puts tally.Counter 435 } 436 437 func newPostingsListCacheMethodMetrics(scope tally.Scope) *postingsListCacheMethodMetrics { 438 return &postingsListCacheMethodMetrics{ 439 hits: scope.Counter("hits"), 440 misses: scope.Counter("misses"), 441 puts: scope.Counter("puts"), 442 } 443 }