github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/mem/terms_dict.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package mem
    22  
    23  import (
    24  	re "regexp"
    25  	"sync"
    26  
    27  	"github.com/m3db/m3/src/m3ninx/doc"
    28  	sgmt "github.com/m3db/m3/src/m3ninx/index/segment"
    29  	"github.com/m3db/m3/src/m3ninx/postings"
    30  	"github.com/m3db/m3/src/m3ninx/postings/roaring"
    31  )
    32  
    33  // termsDict is an in-memory terms dictionary. It maps fields to postings lists.
    34  type termsDict struct {
    35  	opts Options
    36  
    37  	currFieldsPostingsLists []postings.List
    38  
    39  	fields struct {
    40  		sync.RWMutex
    41  		*fieldsMap
    42  	}
    43  }
    44  
    45  func newTermsDict(opts Options) termsDictionary {
    46  	dict := &termsDict{
    47  		opts: opts,
    48  	}
    49  	dict.fields.fieldsMap = newFieldsMap(fieldsMapOptions{
    50  		InitialSize: opts.InitialCapacity(),
    51  	})
    52  	return dict
    53  }
    54  
    55  func (d *termsDict) Insert(field doc.Field, id postings.ID) error {
    56  	postingsMap := d.getOrAddName(field.Name)
    57  	return postingsMap.Add(field.Value, id)
    58  }
    59  
    60  func (d *termsDict) ContainsField(field []byte) bool {
    61  	d.fields.RLock()
    62  	defer d.fields.RUnlock()
    63  	_, ok := d.fields.Get(field)
    64  	return ok
    65  }
    66  
    67  func (d *termsDict) ContainsTerm(field, term []byte) bool {
    68  	_, found := d.matchTerm(field, term)
    69  	return found
    70  }
    71  
    72  func (d *termsDict) MatchTerm(field, term []byte) postings.List {
    73  	pl, found := d.matchTerm(field, term)
    74  	if !found {
    75  		return d.opts.PostingsListPool().Get()
    76  	}
    77  	return pl
    78  }
    79  
    80  func (d *termsDict) Fields() sgmt.FieldsIterator {
    81  	d.fields.RLock()
    82  	defer d.fields.RUnlock()
    83  	fields := d.opts.BytesSliceArrayPool().Get()
    84  	for _, entry := range d.fields.Iter() {
    85  		fields = append(fields, entry.Key())
    86  	}
    87  	return newBytesSliceIter(fields, d.opts)
    88  }
    89  
    90  func (d *termsDict) FieldsPostingsList() sgmt.FieldsPostingsListIterator {
    91  	d.fields.RLock()
    92  	defer d.fields.RUnlock()
    93  	// NB(bodu): This is probably fine since the terms dict/mem segment is only used in tests.
    94  	fields := make([]uniqueField, 0, d.fields.Len())
    95  	for _, entry := range d.fields.Iter() {
    96  		d.currFieldsPostingsLists = d.currFieldsPostingsLists[:0]
    97  		field := entry.Key()
    98  		pl := roaring.NewPostingsList()
    99  		if postingsMap, ok := d.fields.Get(field); ok {
   100  			for _, entry := range postingsMap.Iter() {
   101  				d.currFieldsPostingsLists = append(d.currFieldsPostingsLists, entry.value)
   102  			}
   103  		}
   104  		_ = pl.UnionManyInPlace(d.currFieldsPostingsLists)
   105  		fields = append(fields, uniqueField{
   106  			field:        field,
   107  			postingsList: pl,
   108  		})
   109  	}
   110  	return newUniqueFieldsIter(fields, d.opts)
   111  }
   112  
   113  func (d *termsDict) Terms(field []byte) sgmt.TermsIterator {
   114  	d.fields.RLock()
   115  	defer d.fields.RUnlock()
   116  	values, ok := d.fields.Get(field)
   117  	if !ok {
   118  		return sgmt.EmptyTermsIterator
   119  	}
   120  	return values.Keys()
   121  }
   122  
   123  func (d *termsDict) matchTerm(field, term []byte) (postings.List, bool) {
   124  	d.fields.RLock()
   125  	postingsMap, ok := d.fields.Get(field)
   126  	d.fields.RUnlock()
   127  	if !ok {
   128  		return nil, false
   129  	}
   130  	pl, ok := postingsMap.Get(term)
   131  	if !ok {
   132  		return nil, false
   133  	}
   134  	return pl, true
   135  }
   136  
   137  func (d *termsDict) MatchRegexp(
   138  	field []byte,
   139  	compiled *re.Regexp,
   140  ) postings.List {
   141  	d.fields.RLock()
   142  	postingsMap, ok := d.fields.Get(field)
   143  	d.fields.RUnlock()
   144  	if !ok {
   145  		return d.opts.PostingsListPool().Get()
   146  	}
   147  	pl, ok := postingsMap.GetRegex(compiled)
   148  	if !ok {
   149  		return d.opts.PostingsListPool().Get()
   150  	}
   151  	return pl
   152  }
   153  
   154  func (d *termsDict) Reset() {
   155  	d.fields.Lock()
   156  	defer d.fields.Unlock()
   157  
   158  	// TODO(r): We actually want to keep the terms maps around so that they
   159  	// can be reused and avoid reallocation, so instead of deleting them
   160  	// we should just reset each one - however we were seeing some racey
   161  	// issues so now just deleting all entries for now
   162  	d.fields.Reallocate()
   163  }
   164  
   165  func (d *termsDict) getOrAddName(name []byte) *concurrentPostingsMap {
   166  	// Cheap read lock to see if it already exists.
   167  	d.fields.RLock()
   168  	postingsMap, ok := d.fields.Get(name)
   169  	d.fields.RUnlock()
   170  	if ok {
   171  		return postingsMap
   172  	}
   173  
   174  	// Acquire write lock and create.
   175  	d.fields.Lock()
   176  	postingsMap, ok = d.fields.Get(name)
   177  
   178  	// Check if it's been created since we last acquired the lock.
   179  	if ok {
   180  		d.fields.Unlock()
   181  		return postingsMap
   182  	}
   183  
   184  	postingsMap = newConcurrentPostingsMap(d.opts)
   185  	d.fields.SetUnsafe(name, postingsMap, fieldsMapSetUnsafeOptions{
   186  		NoCopyKey:     true,
   187  		NoFinalizeKey: true,
   188  	})
   189  	d.fields.Unlock()
   190  	return postingsMap
   191  }