github.com/m3db/m3@v1.5.0/src/m3ninx/index/segment/mem/terms_dict.go (about) 1 // Copyright (c) 2020 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package mem 22 23 import ( 24 re "regexp" 25 "sync" 26 27 "github.com/m3db/m3/src/m3ninx/doc" 28 sgmt "github.com/m3db/m3/src/m3ninx/index/segment" 29 "github.com/m3db/m3/src/m3ninx/postings" 30 "github.com/m3db/m3/src/m3ninx/postings/roaring" 31 ) 32 33 // termsDict is an in-memory terms dictionary. It maps fields to postings lists. 34 type termsDict struct { 35 opts Options 36 37 currFieldsPostingsLists []postings.List 38 39 fields struct { 40 sync.RWMutex 41 *fieldsMap 42 } 43 } 44 45 func newTermsDict(opts Options) termsDictionary { 46 dict := &termsDict{ 47 opts: opts, 48 } 49 dict.fields.fieldsMap = newFieldsMap(fieldsMapOptions{ 50 InitialSize: opts.InitialCapacity(), 51 }) 52 return dict 53 } 54 55 func (d *termsDict) Insert(field doc.Field, id postings.ID) error { 56 postingsMap := d.getOrAddName(field.Name) 57 return postingsMap.Add(field.Value, id) 58 } 59 60 func (d *termsDict) ContainsField(field []byte) bool { 61 d.fields.RLock() 62 defer d.fields.RUnlock() 63 _, ok := d.fields.Get(field) 64 return ok 65 } 66 67 func (d *termsDict) ContainsTerm(field, term []byte) bool { 68 _, found := d.matchTerm(field, term) 69 return found 70 } 71 72 func (d *termsDict) MatchTerm(field, term []byte) postings.List { 73 pl, found := d.matchTerm(field, term) 74 if !found { 75 return d.opts.PostingsListPool().Get() 76 } 77 return pl 78 } 79 80 func (d *termsDict) Fields() sgmt.FieldsIterator { 81 d.fields.RLock() 82 defer d.fields.RUnlock() 83 fields := d.opts.BytesSliceArrayPool().Get() 84 for _, entry := range d.fields.Iter() { 85 fields = append(fields, entry.Key()) 86 } 87 return newBytesSliceIter(fields, d.opts) 88 } 89 90 func (d *termsDict) FieldsPostingsList() sgmt.FieldsPostingsListIterator { 91 d.fields.RLock() 92 defer d.fields.RUnlock() 93 // NB(bodu): This is probably fine since the terms dict/mem segment is only used in tests. 94 fields := make([]uniqueField, 0, d.fields.Len()) 95 for _, entry := range d.fields.Iter() { 96 d.currFieldsPostingsLists = d.currFieldsPostingsLists[:0] 97 field := entry.Key() 98 pl := roaring.NewPostingsList() 99 if postingsMap, ok := d.fields.Get(field); ok { 100 for _, entry := range postingsMap.Iter() { 101 d.currFieldsPostingsLists = append(d.currFieldsPostingsLists, entry.value) 102 } 103 } 104 _ = pl.UnionManyInPlace(d.currFieldsPostingsLists) 105 fields = append(fields, uniqueField{ 106 field: field, 107 postingsList: pl, 108 }) 109 } 110 return newUniqueFieldsIter(fields, d.opts) 111 } 112 113 func (d *termsDict) Terms(field []byte) sgmt.TermsIterator { 114 d.fields.RLock() 115 defer d.fields.RUnlock() 116 values, ok := d.fields.Get(field) 117 if !ok { 118 return sgmt.EmptyTermsIterator 119 } 120 return values.Keys() 121 } 122 123 func (d *termsDict) matchTerm(field, term []byte) (postings.List, bool) { 124 d.fields.RLock() 125 postingsMap, ok := d.fields.Get(field) 126 d.fields.RUnlock() 127 if !ok { 128 return nil, false 129 } 130 pl, ok := postingsMap.Get(term) 131 if !ok { 132 return nil, false 133 } 134 return pl, true 135 } 136 137 func (d *termsDict) MatchRegexp( 138 field []byte, 139 compiled *re.Regexp, 140 ) postings.List { 141 d.fields.RLock() 142 postingsMap, ok := d.fields.Get(field) 143 d.fields.RUnlock() 144 if !ok { 145 return d.opts.PostingsListPool().Get() 146 } 147 pl, ok := postingsMap.GetRegex(compiled) 148 if !ok { 149 return d.opts.PostingsListPool().Get() 150 } 151 return pl 152 } 153 154 func (d *termsDict) Reset() { 155 d.fields.Lock() 156 defer d.fields.Unlock() 157 158 // TODO(r): We actually want to keep the terms maps around so that they 159 // can be reused and avoid reallocation, so instead of deleting them 160 // we should just reset each one - however we were seeing some racey 161 // issues so now just deleting all entries for now 162 d.fields.Reallocate() 163 } 164 165 func (d *termsDict) getOrAddName(name []byte) *concurrentPostingsMap { 166 // Cheap read lock to see if it already exists. 167 d.fields.RLock() 168 postingsMap, ok := d.fields.Get(name) 169 d.fields.RUnlock() 170 if ok { 171 return postingsMap 172 } 173 174 // Acquire write lock and create. 175 d.fields.Lock() 176 postingsMap, ok = d.fields.Get(name) 177 178 // Check if it's been created since we last acquired the lock. 179 if ok { 180 d.fields.Unlock() 181 return postingsMap 182 } 183 184 postingsMap = newConcurrentPostingsMap(d.opts) 185 d.fields.SetUnsafe(name, postingsMap, fieldsMapSetUnsafeOptions{ 186 NoCopyKey: true, 187 NoFinalizeKey: true, 188 }) 189 d.fields.Unlock() 190 return postingsMap 191 }