github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/fields_terms_iterator.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package index
    22  
    23  import (
    24  	"errors"
    25  
    26  	pilosaroaring "github.com/m3dbx/pilosa/roaring"
    27  
    28  	"github.com/m3db/m3/src/dbnode/tracepoint"
    29  	"github.com/m3db/m3/src/m3ninx/index/segment"
    30  	"github.com/m3db/m3/src/m3ninx/postings"
    31  	"github.com/m3db/m3/src/m3ninx/postings/roaring"
    32  	"github.com/m3db/m3/src/m3ninx/search"
    33  	"github.com/m3db/m3/src/x/context"
    34  	xerrors "github.com/m3db/m3/src/x/errors"
    35  )
    36  
    37  var errUnpackBitmapFromPostingsList = errors.New("unable to unpack bitmap from postings list")
    38  
    39  // fieldsAndTermsIteratorOpts configures the fieldsAndTermsIterator.
    40  type fieldsAndTermsIteratorOpts struct {
    41  	restrictByQuery *Query
    42  	iterateTerms    bool
    43  	allowFn         allowFn
    44  	fieldIterFn     newFieldIterFn
    45  }
    46  
    47  func (o fieldsAndTermsIteratorOpts) allow(f []byte) bool {
    48  	if o.allowFn == nil {
    49  		return true
    50  	}
    51  	return o.allowFn(f)
    52  }
    53  
    54  func (o fieldsAndTermsIteratorOpts) newFieldIter(r segment.Reader) (segment.FieldsPostingsListIterator, error) {
    55  	if o.fieldIterFn == nil {
    56  		return r.FieldsPostingsList()
    57  	}
    58  	return o.fieldIterFn(r)
    59  }
    60  
    61  type allowFn func(field []byte) bool
    62  
    63  type newFieldIterFn func(r segment.Reader) (segment.FieldsPostingsListIterator, error)
    64  
    65  type fieldsAndTermsIter struct {
    66  	reader segment.Reader
    67  	opts   fieldsAndTermsIteratorOpts
    68  
    69  	err       error
    70  	fieldIter segment.FieldsPostingsListIterator
    71  	termIter  segment.TermsIterator
    72  
    73  	current struct {
    74  		field    []byte
    75  		term     []byte
    76  		postings postings.List
    77  	}
    78  
    79  	restrictByPostings *pilosaroaring.Bitmap
    80  }
    81  
    82  var fieldsAndTermsIterZeroed fieldsAndTermsIter
    83  
    84  var _ fieldsAndTermsIterator = &fieldsAndTermsIter{}
    85  
    86  // newFieldsAndTermsIteratorFn is the lambda definition of the ctor for fieldsAndTermsIterator.
    87  type newFieldsAndTermsIteratorFn func(
    88  	ctx context.Context, r segment.Reader, opts fieldsAndTermsIteratorOpts,
    89  ) (fieldsAndTermsIterator, error)
    90  
    91  func newFieldsAndTermsIterator(
    92  	ctx context.Context,
    93  	reader segment.Reader,
    94  	opts fieldsAndTermsIteratorOpts,
    95  ) (fieldsAndTermsIterator, error) {
    96  	iter := &fieldsAndTermsIter{
    97  		reader: reader,
    98  		opts:   opts,
    99  	}
   100  
   101  	fiter, err := iter.opts.newFieldIter(reader)
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  	iter.fieldIter = fiter
   106  
   107  	if opts.restrictByQuery == nil {
   108  		// No need to restrict results by query.
   109  		return iter, nil
   110  	}
   111  
   112  	// If need to restrict by query, run the query on the segment first.
   113  	searchQuery := opts.restrictByQuery.SearchQuery()
   114  	searcher, err := searchQuery.Searcher()
   115  	if err != nil {
   116  		return nil, err
   117  	}
   118  
   119  	var (
   120  		_, sp = ctx.StartTraceSpan(tracepoint.FieldTermsIteratorIndexSearch)
   121  		pl    postings.List
   122  	)
   123  	if readThrough, ok := reader.(search.ReadThroughSegmentSearcher); ok {
   124  		pl, err = readThrough.Search(searchQuery, searcher)
   125  	} else {
   126  		pl, err = searcher.Search(reader)
   127  	}
   128  	sp.Finish()
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  
   133  	// Hold onto the postings bitmap to intersect against on a per term basis.
   134  	bitmap, ok := roaring.BitmapFromPostingsList(pl)
   135  	if !ok {
   136  		return nil, errUnpackBitmapFromPostingsList
   137  	}
   138  
   139  	iter.restrictByPostings = bitmap
   140  	return iter, nil
   141  }
   142  
   143  func (fti *fieldsAndTermsIter) setNextField() bool {
   144  	fieldIter := fti.fieldIter
   145  	if fieldIter == nil {
   146  		return false
   147  	}
   148  
   149  	for fieldIter.Next() {
   150  		field, pl := fieldIter.Current()
   151  		if !fti.opts.allow(field) {
   152  			continue
   153  		}
   154  		if fti.restrictByPostings == nil {
   155  			// No restrictions.
   156  			fti.current.field = field
   157  			return true
   158  		}
   159  
   160  		bitmap, ok := roaring.BitmapFromPostingsList(pl)
   161  		if !ok {
   162  			fti.err = errUnpackBitmapFromPostingsList
   163  			return false
   164  		}
   165  
   166  		// Check field is part of at least some of the documents we're
   167  		// restricted to providing results for based on intersection
   168  		// count.
   169  		// Note: IntersectionCount is significantly faster than intersecting and
   170  		// counting results and also does not allocate.
   171  		if n := fti.restrictByPostings.IntersectionCount(bitmap); n < 1 {
   172  			// No match, not next result.
   173  			continue
   174  		}
   175  
   176  		// Matches, this is next result.
   177  		fti.current.field = field
   178  		return true
   179  	}
   180  
   181  	fti.err = fieldIter.Err()
   182  	return false
   183  }
   184  
   185  func (fti *fieldsAndTermsIter) setNext() bool {
   186  	// check if current field has another term
   187  	if fti.termIter != nil {
   188  		hasNextTerm, err := fti.nextTermsIterResult()
   189  		if err != nil {
   190  			fti.err = err
   191  			return false
   192  		}
   193  		if hasNextTerm {
   194  			return true
   195  		}
   196  	}
   197  
   198  	// i.e. need to switch to next field
   199  	for hasNextField := fti.setNextField(); hasNextField; hasNextField = fti.setNextField() {
   200  		// and get next term for the field
   201  		var err error
   202  		fti.termIter, err = fti.reader.Terms(fti.current.field)
   203  		if err != nil {
   204  			fti.err = err
   205  			return false
   206  		}
   207  
   208  		hasNextTerm, err := fti.nextTermsIterResult()
   209  		if err != nil {
   210  			fti.err = err
   211  			return false
   212  		}
   213  		if hasNextTerm {
   214  			return true
   215  		}
   216  	}
   217  
   218  	// Check field iterator did not encounter error.
   219  	if err := fti.fieldIter.Err(); err != nil {
   220  		fti.err = err
   221  		return false
   222  	}
   223  
   224  	// No more fields.
   225  	return false
   226  }
   227  
   228  func (fti *fieldsAndTermsIter) nextTermsIterResult() (bool, error) {
   229  	for fti.termIter.Next() {
   230  		fti.current.term, fti.current.postings = fti.termIter.Current()
   231  		if fti.restrictByPostings == nil {
   232  			// No restrictions.
   233  			return true, nil
   234  		}
   235  
   236  		bitmap, ok := roaring.BitmapFromPostingsList(fti.current.postings)
   237  		if !ok {
   238  			return false, errUnpackBitmapFromPostingsList
   239  		}
   240  
   241  		// Check term is part of at least some of the documents we're
   242  		// restricted to providing results for based on intersection
   243  		// count.
   244  		// Note: IntersectionCount is significantly faster than intersecting and
   245  		// counting results and also does not allocate.
   246  		if n := fti.restrictByPostings.IntersectionCount(bitmap); n > 0 {
   247  			// Matches, this is next result.
   248  			return true, nil
   249  		}
   250  	}
   251  	if err := fti.termIter.Err(); err != nil {
   252  		return false, err
   253  	}
   254  	if err := fti.termIter.Close(); err != nil {
   255  		return false, err
   256  	}
   257  	// Term iterator no longer relevant, no next.
   258  	fti.termIter = nil
   259  	return false, nil
   260  }
   261  
   262  func (fti *fieldsAndTermsIter) Next() bool {
   263  	if fti.err != nil {
   264  		return false
   265  	}
   266  	// if only need to iterate fields
   267  	if !fti.opts.iterateTerms {
   268  		return fti.setNextField()
   269  	}
   270  	// iterating both fields and terms
   271  	return fti.setNext()
   272  }
   273  
   274  func (fti *fieldsAndTermsIter) Current() (field, term []byte) {
   275  	return fti.current.field, fti.current.term
   276  }
   277  
   278  func (fti *fieldsAndTermsIter) Err() error {
   279  	return fti.err
   280  }
   281  
   282  func (fti *fieldsAndTermsIter) Close() error {
   283  	var multiErr xerrors.MultiError
   284  	if fti.fieldIter != nil {
   285  		multiErr = multiErr.Add(fti.fieldIter.Close())
   286  	}
   287  	if fti.termIter != nil {
   288  		multiErr = multiErr.Add(fti.termIter.Close())
   289  	}
   290  	return multiErr.FinalError()
   291  }