github.com/m3db/m3@v1.5.0/src/m3ninx/index/segment/builder/terms.go (about)

     1  // Copyright (c) 2020 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package builder
    22  
    23  import (
    24  	"bytes"
    25  
    26  	"github.com/m3db/m3/src/m3ninx/postings"
    27  	"github.com/twotwotwo/sorts"
    28  )
    29  
    30  type terms struct {
    31  	opts                Options
    32  	pool                postings.Pool
    33  	postings            *PostingsMap
    34  	postingsListUnion   postings.MutableList
    35  	uniqueTerms         []termElem
    36  	uniqueTermsIsSorted bool
    37  }
    38  
    39  type termElem struct {
    40  	term     []byte
    41  	postings postings.List
    42  }
    43  
    44  func newTerms(opts Options) *terms {
    45  	pool := opts.PostingsListPool()
    46  	return &terms{
    47  		opts:              opts,
    48  		pool:              pool,
    49  		postingsListUnion: pool.Get(),
    50  		postings:          NewPostingsMap(PostingsMapOptions{}),
    51  	}
    52  }
    53  
    54  func (t *terms) size() int {
    55  	return len(t.uniqueTerms)
    56  }
    57  
    58  func (t *terms) post(term []byte, id postings.ID) error {
    59  	postingsList, ok := t.postings.Get(term)
    60  	if !ok {
    61  		postingsList = t.pool.Get()
    62  		t.postings.SetUnsafe(term, postingsList, PostingsMapSetUnsafeOptions{
    63  			NoCopyKey:     true,
    64  			NoFinalizeKey: true,
    65  		})
    66  
    67  	}
    68  
    69  	// If empty posting list, track insertion of this key into the terms
    70  	// collection for correct response when retrieving all terms
    71  	newTerm := postingsList.Len() == 0
    72  	if err := postingsList.Insert(id); err != nil {
    73  		return err
    74  	}
    75  	if err := t.postingsListUnion.Insert(id); err != nil {
    76  		return err
    77  	}
    78  	if newTerm {
    79  		t.uniqueTerms = append(t.uniqueTerms, termElem{
    80  			term:     term,
    81  			postings: postingsList,
    82  		})
    83  		t.uniqueTermsIsSorted = false
    84  	}
    85  	return nil
    86  }
    87  
    88  // nolint: unused
    89  func (t *terms) get(term []byte) (postings.List, bool) {
    90  	value, ok := t.postings.Get(term)
    91  	return value, ok
    92  }
    93  
    94  func (t *terms) sortIfRequired() {
    95  	if t.uniqueTermsIsSorted {
    96  		return
    97  	}
    98  
    99  	// NB(r): See SetSortConcurrency why this RLock is required.
   100  	sortConcurrencyLock.RLock()
   101  	sorts.ByBytes(t)
   102  	sortConcurrencyLock.RUnlock()
   103  
   104  	t.uniqueTermsIsSorted = true
   105  }
   106  
   107  func (t *terms) reset() {
   108  	// Keep postings map lookup, return postings lists to pool
   109  	for _, entry := range t.postings.Iter() {
   110  		t.pool.Put(entry.Value())
   111  	}
   112  	t.postings.Reset()
   113  	t.postingsListUnion.Reset()
   114  
   115  	// Reset the unique terms slice
   116  	var emptyTerm termElem
   117  	for i := range t.uniqueTerms {
   118  		t.uniqueTerms[i] = emptyTerm
   119  	}
   120  	t.uniqueTerms = t.uniqueTerms[:0]
   121  	t.uniqueTermsIsSorted = false
   122  }
   123  
   124  func (t *terms) Len() int {
   125  	return len(t.uniqueTerms)
   126  }
   127  
   128  func (t *terms) Less(i, j int) bool {
   129  	return bytes.Compare(t.uniqueTerms[i].term, t.uniqueTerms[j].term) < 0
   130  }
   131  
   132  func (t *terms) Swap(i, j int) {
   133  	t.uniqueTerms[i], t.uniqueTerms[j] = t.uniqueTerms[j], t.uniqueTerms[i]
   134  }
   135  
   136  func (t *terms) Key(i int) []byte {
   137  	return t.uniqueTerms[i].term
   138  }