github.com/m3db/m3@v1.5.0/src/m3ninx/index/segment/builder/multi_segments_terms_iter.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package builder
    22  
    23  import (
    24  	"github.com/m3db/m3/src/m3ninx/index/segment"
    25  	"github.com/m3db/m3/src/m3ninx/postings"
    26  	"github.com/m3db/m3/src/m3ninx/postings/roaring"
    27  	xerrors "github.com/m3db/m3/src/x/errors"
    28  	bitmap "github.com/m3dbx/pilosa/roaring"
    29  )
    30  
    31  const (
    32  	defaultBitmapContainerPooling = 128
    33  )
    34  
    35  // Ensure for our use case that the terms iter from segments we return
    36  // matches the signature for the terms iterator.
    37  var _ segment.TermsIterator = &termsIterFromSegments{}
    38  
    39  type termsIterFromSegments struct {
    40  	keyIter          *multiKeyIterator
    41  	currPostingsList postings.MutableList
    42  	bitmapIter       *bitmap.Iterator
    43  
    44  	segments []segmentTermsMetadata
    45  
    46  	err        error
    47  	termsIters []*termsKeyIter
    48  }
    49  
    50  type segmentTermsMetadata struct {
    51  	segment       segmentMetadata
    52  	termsIterable segment.TermsIterable
    53  }
    54  
    55  func newTermsIterFromSegments() *termsIterFromSegments {
    56  	b := bitmap.NewBitmapWithDefaultPooling(defaultBitmapContainerPooling)
    57  	return &termsIterFromSegments{
    58  		keyIter:          newMultiKeyIterator(),
    59  		currPostingsList: roaring.NewPostingsListFromBitmap(b),
    60  		bitmapIter:       &bitmap.Iterator{},
    61  	}
    62  }
    63  
    64  func (i *termsIterFromSegments) clear() {
    65  	i.segments = nil
    66  	i.clearTermIters()
    67  }
    68  
    69  func (i *termsIterFromSegments) clearTermIters() {
    70  	i.keyIter.reset()
    71  	i.currPostingsList.Reset()
    72  	i.err = nil
    73  	for _, termIter := range i.termsIters {
    74  		termIter.iter = nil
    75  		termIter.segment = segmentMetadata{}
    76  	}
    77  }
    78  
    79  func (i *termsIterFromSegments) reset(segments []segmentMetadata) {
    80  	i.clear()
    81  
    82  	for _, seg := range segments {
    83  		i.segments = append(i.segments, segmentTermsMetadata{
    84  			segment:       seg,
    85  			termsIterable: seg.segment.TermsIterable(),
    86  		})
    87  	}
    88  }
    89  
    90  func (i *termsIterFromSegments) setField(field []byte) error {
    91  	i.clearTermIters()
    92  
    93  	// Alloc any required terms iter containers
    94  	numTermsIterAlloc := len(i.segments) - len(i.termsIters)
    95  	for j := 0; j < numTermsIterAlloc; j++ {
    96  		i.termsIters = append(i.termsIters, &termsKeyIter{})
    97  	}
    98  
    99  	// Add our de-duping multi key value iterator
   100  	i.keyIter.reset()
   101  	for j, seg := range i.segments {
   102  		iter, err := seg.termsIterable.Terms(field)
   103  		if err != nil {
   104  			return err
   105  		}
   106  		if iter.Empty() {
   107  			// Don't consume this iterator if no results
   108  			if err := xerrors.FirstError(iter.Err(), iter.Close()); err != nil {
   109  				return err
   110  			}
   111  			continue
   112  		}
   113  
   114  		tersmKeyIter := i.termsIters[j]
   115  		tersmKeyIter.iter = iter
   116  		tersmKeyIter.segment = seg.segment
   117  		i.keyIter.add(tersmKeyIter)
   118  	}
   119  
   120  	return nil
   121  }
   122  
   123  func (i *termsIterFromSegments) Empty() bool {
   124  	return i.keyIter.Empty()
   125  }
   126  
   127  func (i *termsIterFromSegments) Next() bool {
   128  	for {
   129  		if i.err != nil {
   130  			return false
   131  		}
   132  
   133  		if !i.keyIter.Next() {
   134  			return false
   135  		}
   136  
   137  		// Create the overlayed postings list for this term
   138  		i.currPostingsList.Reset()
   139  		for _, iter := range i.keyIter.CurrentIters() {
   140  			termsKeyIter := iter.(*termsKeyIter)
   141  			_, list := termsKeyIter.iter.Current()
   142  
   143  			if termsKeyIter.segment.offset == 0 && termsKeyIter.segment.skips == 0 {
   144  				// No offset, which means is first segment we are combining from
   145  				// so can just direct union.
   146  				if err := i.currPostingsList.UnionInPlace(list); err != nil {
   147  					i.err = err
   148  					return false
   149  				}
   150  				continue
   151  			}
   152  
   153  			// We have to take into account offset and duplicates/skips.
   154  			var (
   155  				iter            = list.Iterator()
   156  				negativeOffsets = termsKeyIter.segment.negativeOffsets
   157  				multiErr        = xerrors.NewMultiError()
   158  			)
   159  			for iter.Next() {
   160  				curr := iter.Current()
   161  				negativeOffset := negativeOffsets[curr]
   162  				// Then skip the individual if matches.
   163  				if negativeOffset == -1 {
   164  					// Skip this value, as itself is a duplicate.
   165  					continue
   166  				}
   167  				value := curr + termsKeyIter.segment.offset - postings.ID(negativeOffset)
   168  				if err := i.currPostingsList.Insert(value); err != nil {
   169  					multiErr = multiErr.Add(err)
   170  					multiErr = multiErr.Add(iter.Close())
   171  					i.err = multiErr.FinalError()
   172  					return false
   173  				}
   174  			}
   175  
   176  			multiErr = multiErr.Add(iter.Err())
   177  			multiErr = multiErr.Add(iter.Close())
   178  			i.err = multiErr.FinalError()
   179  			if i.err != nil {
   180  				return false
   181  			}
   182  		}
   183  
   184  		// Continue looping only if everything skipped or term is empty.
   185  		if !i.currPostingsList.IsEmpty() {
   186  			return true
   187  		}
   188  	}
   189  }
   190  
   191  func (i *termsIterFromSegments) Current() ([]byte, postings.List) {
   192  	return i.keyIter.Current(), i.currPostingsList
   193  }
   194  
   195  func (i *termsIterFromSegments) Err() error {
   196  	if err := i.keyIter.Err(); err != nil {
   197  		return err
   198  	}
   199  	return i.err
   200  }
   201  
   202  func (i *termsIterFromSegments) Close() error {
   203  	err := i.keyIter.Close()
   204  	// Free resources
   205  	i.clearTermIters()
   206  	return err
   207  }
   208  
   209  // termsKeyIter needs to be a keyIterator and contains a terms iterator
   210  var _ keyIterator = &termsKeyIter{}
   211  
   212  type termsKeyIter struct {
   213  	iter    segment.TermsIterator
   214  	segment segmentMetadata
   215  }
   216  
   217  func (i *termsKeyIter) Next() bool {
   218  	return i.iter.Next()
   219  }
   220  
   221  func (i *termsKeyIter) Current() []byte {
   222  	t, _ := i.iter.Current()
   223  	return t
   224  }
   225  
   226  func (i *termsKeyIter) Err() error {
   227  	return i.iter.Err()
   228  }
   229  
   230  func (i *termsKeyIter) Close() error {
   231  	return i.iter.Close()
   232  }