github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/read_through_segment.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package index 22 23 import ( 24 "errors" 25 "sync" 26 27 "github.com/m3db/m3/src/m3ninx/doc" 28 "github.com/m3db/m3/src/m3ninx/index" 29 "github.com/m3db/m3/src/m3ninx/index/segment" 30 "github.com/m3db/m3/src/m3ninx/postings" 31 "github.com/m3db/m3/src/m3ninx/search" 32 33 "github.com/pborman/uuid" 34 ) 35 36 var ( 37 errCantGetReaderFromClosedSegment = errors.New("cant get reader from closed segment") 38 errCantCloseClosedSegment = errors.New("cant close closed segment") 39 ) 40 41 // Ensure FST segment implements ImmutableSegment so can be casted upwards 42 // and mmap's can be freed. 43 var _ segment.ImmutableSegment = (*ReadThroughSegment)(nil) 44 45 // ReadThroughSegment wraps a segment with a postings list cache so that 46 // queries can be transparently cached in a read through manner. In addition, 47 // the postings lists returned by the segments may not be safe to use once the 48 // underlying segments are closed due to the postings lists pointing into the 49 // segments mmap'd region. As a result, the close method of the ReadThroughSegment 50 // will make sure that the cache is purged of all the segments postings lists before 51 // the segment itself is closed. 52 type ReadThroughSegment struct { 53 sync.RWMutex 54 55 segment segment.ImmutableSegment 56 57 uuid uuid.UUID 58 caches ReadThroughSegmentCaches 59 60 opts ReadThroughSegmentOptions 61 62 closed bool 63 } 64 65 // ReadThroughSegmentCaches is the set of caches 66 // to use for the read through segment. 67 type ReadThroughSegmentCaches struct { 68 SegmentPostingsListCache *PostingsListCache 69 SearchPostingsListCache *PostingsListCache 70 } 71 72 // ReadThroughSegmentOptions is the options struct for the 73 // ReadThroughSegment. 74 type ReadThroughSegmentOptions struct { 75 // CacheRegexp sets whether the postings list for regexp queries 76 // should be cached. 77 CacheRegexp bool 78 // CacheTerms sets whether the postings list for term queries 79 // should be cached. 80 CacheTerms bool 81 // CacheSearches sets whether the postings list for search queries 82 // should be cached. 83 CacheSearches bool 84 } 85 86 // NewReadThroughSegment creates a new read through segment. 87 func NewReadThroughSegment( 88 seg segment.ImmutableSegment, 89 caches ReadThroughSegmentCaches, 90 opts ReadThroughSegmentOptions, 91 ) *ReadThroughSegment { 92 return &ReadThroughSegment{ 93 segment: seg, 94 opts: opts, 95 uuid: uuid.NewUUID(), 96 caches: caches, 97 } 98 } 99 100 // Reader returns a read through reader for the read through segment. 101 func (r *ReadThroughSegment) Reader() (segment.Reader, error) { 102 r.RLock() 103 defer r.RUnlock() 104 if r.closed { 105 return nil, errCantGetReaderFromClosedSegment 106 } 107 108 reader, err := r.segment.Reader() 109 if err != nil { 110 return nil, err 111 } 112 return newReadThroughSegmentReader(r, reader, r.uuid, r.caches, r.opts), nil 113 } 114 115 // Close purges all entries in the cache associated with this segment, 116 // and then closes the underlying segment. 117 func (r *ReadThroughSegment) Close() error { 118 r.Lock() 119 defer r.Unlock() 120 if r.closed { 121 return errCantCloseClosedSegment 122 } 123 124 r.closed = true 125 126 if cache := r.caches.SegmentPostingsListCache; cache != nil { 127 // Purge segments from the cache before closing the segment to avoid 128 // temporarily having postings lists in the cache whose underlying 129 // bytes are no longer mmap'd. 130 cache.PurgeSegment(r.uuid) 131 } 132 if cache := r.caches.SearchPostingsListCache; cache != nil { 133 // Purge segments from the cache before closing the segment to avoid 134 // temporarily having postings lists in the cache whose underlying 135 // bytes are no longer mmap'd. 136 cache.PurgeSegment(r.uuid) 137 } 138 139 return r.segment.Close() 140 } 141 142 // FieldsIterable is a pass through call to the segment, since there's no 143 // postings lists to cache for queries. 144 func (r *ReadThroughSegment) FieldsIterable() segment.FieldsIterable { 145 return r.segment.FieldsIterable() 146 } 147 148 // TermsIterable is a pass through call to the segment, since there's no 149 // postings lists to cache for queries. 150 func (r *ReadThroughSegment) TermsIterable() segment.TermsIterable { 151 return r.segment.TermsIterable() 152 } 153 154 // ContainsID is a pass through call to the segment, since there's no 155 // postings lists to cache for queries. 156 func (r *ReadThroughSegment) ContainsID(id []byte) (bool, error) { 157 return r.segment.ContainsID(id) 158 } 159 160 // ContainsField is a pass through call to the segment, since there's no 161 // postings lists to cache for queries. 162 func (r *ReadThroughSegment) ContainsField(field []byte) (bool, error) { 163 return r.segment.ContainsField(field) 164 } 165 166 // FreeMmap frees the mmapped data if any. 167 func (r *ReadThroughSegment) FreeMmap() error { 168 return r.segment.FreeMmap() 169 } 170 171 // Size is a pass through call to the segment, since there's no 172 // postings lists to cache for queries. 173 func (r *ReadThroughSegment) Size() int64 { 174 return r.segment.Size() 175 } 176 177 // PutCachedSearchPattern caches a search pattern. 178 func (r *ReadThroughSegment) PutCachedSearchPattern( 179 queryStr string, 180 query search.Query, 181 pl postings.List, 182 ) { 183 r.RLock() 184 defer r.RUnlock() 185 if r.closed { 186 return 187 } 188 189 cache := r.caches.SearchPostingsListCache 190 if cache == nil || !r.opts.CacheSearches { 191 return 192 } 193 194 cache.PutSearch(r.uuid, queryStr, query, pl) 195 } 196 197 // CachedSearchPatternsResult defines cached search patterns. 198 type CachedSearchPatternsResult struct { 199 CacheSearchesDisabled bool 200 CachedPatternsResult CachedPatternsResult 201 } 202 203 // CachedSearchPatterns returns cached search patterns. 204 func (r *ReadThroughSegment) CachedSearchPatterns( 205 fn CachedPatternForEachFn, 206 ) CachedSearchPatternsResult { 207 cache := r.caches.SearchPostingsListCache 208 if cache == nil || !r.opts.CacheSearches { 209 return CachedSearchPatternsResult{ 210 CacheSearchesDisabled: true, 211 } 212 } 213 214 patternType := PatternTypeSearch 215 result := cache.CachedPatterns(r.uuid, CachedPatternsQuery{ 216 PatternType: &patternType, 217 }, fn) 218 return CachedSearchPatternsResult{ 219 CachedPatternsResult: result, 220 } 221 } 222 223 var _ search.ReadThroughSegmentSearcher = (*readThroughSegmentReader)(nil) 224 225 type readThroughSegmentReader struct { 226 seg *ReadThroughSegment 227 // reader is explicitly not embedded at the top level 228 // of the struct to force new methods added to index.Reader 229 // to be explicitly supported by the read through cache. 230 reader segment.Reader 231 opts ReadThroughSegmentOptions 232 uuid uuid.UUID 233 caches ReadThroughSegmentCaches 234 } 235 236 func newReadThroughSegmentReader( 237 seg *ReadThroughSegment, 238 reader segment.Reader, 239 uuid uuid.UUID, 240 caches ReadThroughSegmentCaches, 241 opts ReadThroughSegmentOptions, 242 ) segment.Reader { 243 return &readThroughSegmentReader{ 244 seg: seg, 245 reader: reader, 246 opts: opts, 247 uuid: uuid, 248 caches: caches, 249 } 250 } 251 252 // MatchRegexp returns a cached posting list or queries the underlying 253 // segment if their is a cache miss. 254 func (s *readThroughSegmentReader) MatchRegexp( 255 field []byte, 256 c index.CompiledRegex, 257 ) (postings.List, error) { 258 cache := s.caches.SegmentPostingsListCache 259 if cache == nil || !s.opts.CacheRegexp { 260 return s.reader.MatchRegexp(field, c) 261 } 262 263 // TODO(rartoul): Would be nice to not allocate strings here. 264 fieldStr := string(field) 265 patternStr := c.FSTSyntax.String() 266 pl, ok := cache.GetRegexp(s.uuid, fieldStr, patternStr) 267 if ok { 268 return pl, nil 269 } 270 271 pl, err := s.reader.MatchRegexp(field, c) 272 if err == nil { 273 cache.PutRegexp(s.uuid, fieldStr, patternStr, pl) 274 } 275 return pl, err 276 } 277 278 // MatchTerm returns a cached posting list or queries the underlying 279 // segment if their is a cache miss. 280 func (s *readThroughSegmentReader) MatchTerm( 281 field []byte, term []byte, 282 ) (postings.List, error) { 283 cache := s.caches.SegmentPostingsListCache 284 if cache == nil || !s.opts.CacheTerms { 285 return s.reader.MatchTerm(field, term) 286 } 287 288 // TODO(rartoul): Would be nice to not allocate strings here. 289 fieldStr := string(field) 290 patternStr := string(term) 291 pl, ok := cache.GetTerm(s.uuid, fieldStr, patternStr) 292 if ok { 293 return pl, nil 294 } 295 296 pl, err := s.reader.MatchTerm(field, term) 297 if err == nil { 298 cache.PutTerm(s.uuid, fieldStr, patternStr, pl) 299 } 300 return pl, err 301 } 302 303 // MatchField returns a cached posting list or queries the underlying 304 // segment if their is a cache miss. 305 func (s *readThroughSegmentReader) MatchField(field []byte) (postings.List, error) { 306 cache := s.caches.SegmentPostingsListCache 307 if cache == nil || !s.opts.CacheTerms { 308 return s.reader.MatchField(field) 309 } 310 311 // TODO(rartoul): Would be nice to not allocate strings here. 312 fieldStr := string(field) 313 pl, ok := cache.GetField(s.uuid, fieldStr) 314 if ok { 315 return pl, nil 316 } 317 318 pl, err := s.reader.MatchField(field) 319 if err == nil { 320 cache.PutField(s.uuid, fieldStr, pl) 321 } 322 return pl, err 323 } 324 325 // MatchAll is a pass through call, since there's no postings list to cache. 326 // NB(r): The postings list returned by match all is just an iterator 327 // from zero to the maximum document number indexed by the segment and as such 328 // causes no allocations to compute and construct. 329 func (s *readThroughSegmentReader) MatchAll() (postings.List, error) { 330 return s.reader.MatchAll() 331 } 332 333 // AllDocs is a pass through call, since there's no postings list to cache. 334 func (s *readThroughSegmentReader) AllDocs() (index.IDDocIterator, error) { 335 return s.reader.AllDocs() 336 } 337 338 // Metadata is a pass through call, since there's no postings list to cache. 339 func (s *readThroughSegmentReader) Metadata(id postings.ID) (doc.Metadata, error) { 340 return s.reader.Metadata(id) 341 } 342 343 // MetadataIterator is a pass through call, since there's no postings list to cache. 344 func (s *readThroughSegmentReader) MetadataIterator(pl postings.List) (doc.MetadataIterator, error) { 345 return s.reader.MetadataIterator(pl) 346 } 347 348 // Doc is a pass through call, since there's no postings list to cache. 349 func (s *readThroughSegmentReader) Doc(id postings.ID) (doc.Document, error) { 350 return s.reader.Doc(id) 351 } 352 353 // Docs is a pass through call, since there's no postings list to cache. 354 func (s *readThroughSegmentReader) Docs(pl postings.List) (doc.Iterator, error) { 355 return s.reader.Docs(pl) 356 } 357 358 // Fields is a pass through call. 359 func (s *readThroughSegmentReader) Fields() (segment.FieldsIterator, error) { 360 return s.reader.Fields() 361 } 362 363 // FieldsPostingsList is a pass through call. 364 func (s *readThroughSegmentReader) FieldsPostingsList() (segment.FieldsPostingsListIterator, error) { 365 return s.reader.FieldsPostingsList() 366 } 367 368 // ContainsField is a pass through call. 369 func (s *readThroughSegmentReader) ContainsField(field []byte) (bool, error) { 370 return s.reader.ContainsField(field) 371 } 372 373 // Terms is a pass through call. 374 func (s *readThroughSegmentReader) Terms(field []byte) (segment.TermsIterator, error) { 375 return s.reader.Terms(field) 376 } 377 378 // Close is a pass through call. 379 func (s *readThroughSegmentReader) Close() error { 380 return s.reader.Close() 381 } 382 383 func (s *readThroughSegmentReader) Search( 384 query search.Query, 385 searcher search.Searcher, 386 ) (postings.List, error) { 387 cache := s.caches.SearchPostingsListCache 388 if cache == nil || !s.opts.CacheSearches { 389 return searcher.Search(s) 390 } 391 392 // TODO(r): Would be nice to not allocate strings here. 393 queryStr := query.String() 394 pl, ok := cache.GetSearch(s.uuid, queryStr) 395 if ok { 396 return pl, nil 397 } 398 399 pl, err := searcher.Search(s) 400 if err != nil { 401 return nil, err 402 } 403 404 cache.PutSearch(s.uuid, queryStr, query, pl) 405 406 return pl, nil 407 }