github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/read_through_segment.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package index
    22  
    23  import (
    24  	"errors"
    25  	"sync"
    26  
    27  	"github.com/m3db/m3/src/m3ninx/doc"
    28  	"github.com/m3db/m3/src/m3ninx/index"
    29  	"github.com/m3db/m3/src/m3ninx/index/segment"
    30  	"github.com/m3db/m3/src/m3ninx/postings"
    31  	"github.com/m3db/m3/src/m3ninx/search"
    32  
    33  	"github.com/pborman/uuid"
    34  )
    35  
    36  var (
    37  	errCantGetReaderFromClosedSegment = errors.New("cant get reader from closed segment")
    38  	errCantCloseClosedSegment         = errors.New("cant close closed segment")
    39  )
    40  
    41  // Ensure FST segment implements ImmutableSegment so can be casted upwards
    42  // and mmap's can be freed.
    43  var _ segment.ImmutableSegment = (*ReadThroughSegment)(nil)
    44  
    45  // ReadThroughSegment wraps a segment with a postings list cache so that
    46  // queries can be transparently cached in a read through manner. In addition,
    47  // the postings lists returned by the segments may not be safe to use once the
    48  // underlying segments are closed due to the postings lists pointing into the
    49  // segments mmap'd region. As a result, the close method of the ReadThroughSegment
    50  // will make sure that the cache is purged of all the segments postings lists before
    51  // the segment itself is closed.
    52  type ReadThroughSegment struct {
    53  	sync.RWMutex
    54  
    55  	segment segment.ImmutableSegment
    56  
    57  	uuid   uuid.UUID
    58  	caches ReadThroughSegmentCaches
    59  
    60  	opts ReadThroughSegmentOptions
    61  
    62  	closed bool
    63  }
    64  
    65  // ReadThroughSegmentCaches is the set of caches
    66  // to use for the read through segment.
    67  type ReadThroughSegmentCaches struct {
    68  	SegmentPostingsListCache *PostingsListCache
    69  	SearchPostingsListCache  *PostingsListCache
    70  }
    71  
    72  // ReadThroughSegmentOptions is the options struct for the
    73  // ReadThroughSegment.
    74  type ReadThroughSegmentOptions struct {
    75  	// CacheRegexp sets whether the postings list for regexp queries
    76  	// should be cached.
    77  	CacheRegexp bool
    78  	// CacheTerms sets whether the postings list for term queries
    79  	// should be cached.
    80  	CacheTerms bool
    81  	// CacheSearches sets whether the postings list for search queries
    82  	// should be cached.
    83  	CacheSearches bool
    84  }
    85  
    86  // NewReadThroughSegment creates a new read through segment.
    87  func NewReadThroughSegment(
    88  	seg segment.ImmutableSegment,
    89  	caches ReadThroughSegmentCaches,
    90  	opts ReadThroughSegmentOptions,
    91  ) *ReadThroughSegment {
    92  	return &ReadThroughSegment{
    93  		segment: seg,
    94  		opts:    opts,
    95  		uuid:    uuid.NewUUID(),
    96  		caches:  caches,
    97  	}
    98  }
    99  
   100  // Reader returns a read through reader for the read through segment.
   101  func (r *ReadThroughSegment) Reader() (segment.Reader, error) {
   102  	r.RLock()
   103  	defer r.RUnlock()
   104  	if r.closed {
   105  		return nil, errCantGetReaderFromClosedSegment
   106  	}
   107  
   108  	reader, err := r.segment.Reader()
   109  	if err != nil {
   110  		return nil, err
   111  	}
   112  	return newReadThroughSegmentReader(r, reader, r.uuid, r.caches, r.opts), nil
   113  }
   114  
   115  // Close purges all entries in the cache associated with this segment,
   116  // and then closes the underlying segment.
   117  func (r *ReadThroughSegment) Close() error {
   118  	r.Lock()
   119  	defer r.Unlock()
   120  	if r.closed {
   121  		return errCantCloseClosedSegment
   122  	}
   123  
   124  	r.closed = true
   125  
   126  	if cache := r.caches.SegmentPostingsListCache; cache != nil {
   127  		// Purge segments from the cache before closing the segment to avoid
   128  		// temporarily having postings lists in the cache whose underlying
   129  		// bytes are no longer mmap'd.
   130  		cache.PurgeSegment(r.uuid)
   131  	}
   132  	if cache := r.caches.SearchPostingsListCache; cache != nil {
   133  		// Purge segments from the cache before closing the segment to avoid
   134  		// temporarily having postings lists in the cache whose underlying
   135  		// bytes are no longer mmap'd.
   136  		cache.PurgeSegment(r.uuid)
   137  	}
   138  
   139  	return r.segment.Close()
   140  }
   141  
   142  // FieldsIterable is a pass through call to the segment, since there's no
   143  // postings lists to cache for queries.
   144  func (r *ReadThroughSegment) FieldsIterable() segment.FieldsIterable {
   145  	return r.segment.FieldsIterable()
   146  }
   147  
   148  // TermsIterable is a pass through call to the segment, since there's no
   149  // postings lists to cache for queries.
   150  func (r *ReadThroughSegment) TermsIterable() segment.TermsIterable {
   151  	return r.segment.TermsIterable()
   152  }
   153  
   154  // ContainsID is a pass through call to the segment, since there's no
   155  // postings lists to cache for queries.
   156  func (r *ReadThroughSegment) ContainsID(id []byte) (bool, error) {
   157  	return r.segment.ContainsID(id)
   158  }
   159  
   160  // ContainsField is a pass through call to the segment, since there's no
   161  // postings lists to cache for queries.
   162  func (r *ReadThroughSegment) ContainsField(field []byte) (bool, error) {
   163  	return r.segment.ContainsField(field)
   164  }
   165  
   166  // FreeMmap frees the mmapped data if any.
   167  func (r *ReadThroughSegment) FreeMmap() error {
   168  	return r.segment.FreeMmap()
   169  }
   170  
   171  // Size is a pass through call to the segment, since there's no
   172  // postings lists to cache for queries.
   173  func (r *ReadThroughSegment) Size() int64 {
   174  	return r.segment.Size()
   175  }
   176  
   177  // PutCachedSearchPattern caches a search pattern.
   178  func (r *ReadThroughSegment) PutCachedSearchPattern(
   179  	queryStr string,
   180  	query search.Query,
   181  	pl postings.List,
   182  ) {
   183  	r.RLock()
   184  	defer r.RUnlock()
   185  	if r.closed {
   186  		return
   187  	}
   188  
   189  	cache := r.caches.SearchPostingsListCache
   190  	if cache == nil || !r.opts.CacheSearches {
   191  		return
   192  	}
   193  
   194  	cache.PutSearch(r.uuid, queryStr, query, pl)
   195  }
   196  
   197  // CachedSearchPatternsResult defines cached search patterns.
   198  type CachedSearchPatternsResult struct {
   199  	CacheSearchesDisabled bool
   200  	CachedPatternsResult  CachedPatternsResult
   201  }
   202  
   203  // CachedSearchPatterns returns cached search patterns.
   204  func (r *ReadThroughSegment) CachedSearchPatterns(
   205  	fn CachedPatternForEachFn,
   206  ) CachedSearchPatternsResult {
   207  	cache := r.caches.SearchPostingsListCache
   208  	if cache == nil || !r.opts.CacheSearches {
   209  		return CachedSearchPatternsResult{
   210  			CacheSearchesDisabled: true,
   211  		}
   212  	}
   213  
   214  	patternType := PatternTypeSearch
   215  	result := cache.CachedPatterns(r.uuid, CachedPatternsQuery{
   216  		PatternType: &patternType,
   217  	}, fn)
   218  	return CachedSearchPatternsResult{
   219  		CachedPatternsResult: result,
   220  	}
   221  }
   222  
   223  var _ search.ReadThroughSegmentSearcher = (*readThroughSegmentReader)(nil)
   224  
   225  type readThroughSegmentReader struct {
   226  	seg *ReadThroughSegment
   227  	// reader is explicitly not embedded at the top level
   228  	// of the struct to force new methods added to index.Reader
   229  	// to be explicitly supported by the read through cache.
   230  	reader segment.Reader
   231  	opts   ReadThroughSegmentOptions
   232  	uuid   uuid.UUID
   233  	caches ReadThroughSegmentCaches
   234  }
   235  
   236  func newReadThroughSegmentReader(
   237  	seg *ReadThroughSegment,
   238  	reader segment.Reader,
   239  	uuid uuid.UUID,
   240  	caches ReadThroughSegmentCaches,
   241  	opts ReadThroughSegmentOptions,
   242  ) segment.Reader {
   243  	return &readThroughSegmentReader{
   244  		seg:    seg,
   245  		reader: reader,
   246  		opts:   opts,
   247  		uuid:   uuid,
   248  		caches: caches,
   249  	}
   250  }
   251  
   252  // MatchRegexp returns a cached posting list or queries the underlying
   253  // segment if their is a cache miss.
   254  func (s *readThroughSegmentReader) MatchRegexp(
   255  	field []byte,
   256  	c index.CompiledRegex,
   257  ) (postings.List, error) {
   258  	cache := s.caches.SegmentPostingsListCache
   259  	if cache == nil || !s.opts.CacheRegexp {
   260  		return s.reader.MatchRegexp(field, c)
   261  	}
   262  
   263  	// TODO(rartoul): Would be nice to not allocate strings here.
   264  	fieldStr := string(field)
   265  	patternStr := c.FSTSyntax.String()
   266  	pl, ok := cache.GetRegexp(s.uuid, fieldStr, patternStr)
   267  	if ok {
   268  		return pl, nil
   269  	}
   270  
   271  	pl, err := s.reader.MatchRegexp(field, c)
   272  	if err == nil {
   273  		cache.PutRegexp(s.uuid, fieldStr, patternStr, pl)
   274  	}
   275  	return pl, err
   276  }
   277  
   278  // MatchTerm returns a cached posting list or queries the underlying
   279  // segment if their is a cache miss.
   280  func (s *readThroughSegmentReader) MatchTerm(
   281  	field []byte, term []byte,
   282  ) (postings.List, error) {
   283  	cache := s.caches.SegmentPostingsListCache
   284  	if cache == nil || !s.opts.CacheTerms {
   285  		return s.reader.MatchTerm(field, term)
   286  	}
   287  
   288  	// TODO(rartoul): Would be nice to not allocate strings here.
   289  	fieldStr := string(field)
   290  	patternStr := string(term)
   291  	pl, ok := cache.GetTerm(s.uuid, fieldStr, patternStr)
   292  	if ok {
   293  		return pl, nil
   294  	}
   295  
   296  	pl, err := s.reader.MatchTerm(field, term)
   297  	if err == nil {
   298  		cache.PutTerm(s.uuid, fieldStr, patternStr, pl)
   299  	}
   300  	return pl, err
   301  }
   302  
   303  // MatchField returns a cached posting list or queries the underlying
   304  // segment if their is a cache miss.
   305  func (s *readThroughSegmentReader) MatchField(field []byte) (postings.List, error) {
   306  	cache := s.caches.SegmentPostingsListCache
   307  	if cache == nil || !s.opts.CacheTerms {
   308  		return s.reader.MatchField(field)
   309  	}
   310  
   311  	// TODO(rartoul): Would be nice to not allocate strings here.
   312  	fieldStr := string(field)
   313  	pl, ok := cache.GetField(s.uuid, fieldStr)
   314  	if ok {
   315  		return pl, nil
   316  	}
   317  
   318  	pl, err := s.reader.MatchField(field)
   319  	if err == nil {
   320  		cache.PutField(s.uuid, fieldStr, pl)
   321  	}
   322  	return pl, err
   323  }
   324  
   325  // MatchAll is a pass through call, since there's no postings list to cache.
   326  // NB(r): The postings list returned by match all is just an iterator
   327  // from zero to the maximum document number indexed by the segment and as such
   328  // causes no allocations to compute and construct.
   329  func (s *readThroughSegmentReader) MatchAll() (postings.List, error) {
   330  	return s.reader.MatchAll()
   331  }
   332  
   333  // AllDocs is a pass through call, since there's no postings list to cache.
   334  func (s *readThroughSegmentReader) AllDocs() (index.IDDocIterator, error) {
   335  	return s.reader.AllDocs()
   336  }
   337  
   338  // Metadata is a pass through call, since there's no postings list to cache.
   339  func (s *readThroughSegmentReader) Metadata(id postings.ID) (doc.Metadata, error) {
   340  	return s.reader.Metadata(id)
   341  }
   342  
   343  // MetadataIterator is a pass through call, since there's no postings list to cache.
   344  func (s *readThroughSegmentReader) MetadataIterator(pl postings.List) (doc.MetadataIterator, error) {
   345  	return s.reader.MetadataIterator(pl)
   346  }
   347  
   348  // Doc is a pass through call, since there's no postings list to cache.
   349  func (s *readThroughSegmentReader) Doc(id postings.ID) (doc.Document, error) {
   350  	return s.reader.Doc(id)
   351  }
   352  
   353  // Docs is a pass through call, since there's no postings list to cache.
   354  func (s *readThroughSegmentReader) Docs(pl postings.List) (doc.Iterator, error) {
   355  	return s.reader.Docs(pl)
   356  }
   357  
   358  // Fields is a pass through call.
   359  func (s *readThroughSegmentReader) Fields() (segment.FieldsIterator, error) {
   360  	return s.reader.Fields()
   361  }
   362  
   363  // FieldsPostingsList is a pass through call.
   364  func (s *readThroughSegmentReader) FieldsPostingsList() (segment.FieldsPostingsListIterator, error) {
   365  	return s.reader.FieldsPostingsList()
   366  }
   367  
   368  // ContainsField is a pass through call.
   369  func (s *readThroughSegmentReader) ContainsField(field []byte) (bool, error) {
   370  	return s.reader.ContainsField(field)
   371  }
   372  
   373  // Terms is a pass through call.
   374  func (s *readThroughSegmentReader) Terms(field []byte) (segment.TermsIterator, error) {
   375  	return s.reader.Terms(field)
   376  }
   377  
   378  // Close is a pass through call.
   379  func (s *readThroughSegmentReader) Close() error {
   380  	return s.reader.Close()
   381  }
   382  
   383  func (s *readThroughSegmentReader) Search(
   384  	query search.Query,
   385  	searcher search.Searcher,
   386  ) (postings.List, error) {
   387  	cache := s.caches.SearchPostingsListCache
   388  	if cache == nil || !s.opts.CacheSearches {
   389  		return searcher.Search(s)
   390  	}
   391  
   392  	// TODO(r): Would be nice to not allocate strings here.
   393  	queryStr := query.String()
   394  	pl, ok := cache.GetSearch(s.uuid, queryStr)
   395  	if ok {
   396  		return pl, nil
   397  	}
   398  
   399  	pl, err := searcher.Search(s)
   400  	if err != nil {
   401  		return nil, err
   402  	}
   403  
   404  	cache.PutSearch(s.uuid, queryStr, query, pl)
   405  
   406  	return pl, nil
   407  }