github.com/m3db/m3@v1.5.0/src/m3ninx/index/segment/builder/multi_segments_terms_iter.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package builder 22 23 import ( 24 "github.com/m3db/m3/src/m3ninx/index/segment" 25 "github.com/m3db/m3/src/m3ninx/postings" 26 "github.com/m3db/m3/src/m3ninx/postings/roaring" 27 xerrors "github.com/m3db/m3/src/x/errors" 28 bitmap "github.com/m3dbx/pilosa/roaring" 29 ) 30 31 const ( 32 defaultBitmapContainerPooling = 128 33 ) 34 35 // Ensure for our use case that the terms iter from segments we return 36 // matches the signature for the terms iterator. 37 var _ segment.TermsIterator = &termsIterFromSegments{} 38 39 type termsIterFromSegments struct { 40 keyIter *multiKeyIterator 41 currPostingsList postings.MutableList 42 bitmapIter *bitmap.Iterator 43 44 segments []segmentTermsMetadata 45 46 err error 47 termsIters []*termsKeyIter 48 } 49 50 type segmentTermsMetadata struct { 51 segment segmentMetadata 52 termsIterable segment.TermsIterable 53 } 54 55 func newTermsIterFromSegments() *termsIterFromSegments { 56 b := bitmap.NewBitmapWithDefaultPooling(defaultBitmapContainerPooling) 57 return &termsIterFromSegments{ 58 keyIter: newMultiKeyIterator(), 59 currPostingsList: roaring.NewPostingsListFromBitmap(b), 60 bitmapIter: &bitmap.Iterator{}, 61 } 62 } 63 64 func (i *termsIterFromSegments) clear() { 65 i.segments = nil 66 i.clearTermIters() 67 } 68 69 func (i *termsIterFromSegments) clearTermIters() { 70 i.keyIter.reset() 71 i.currPostingsList.Reset() 72 i.err = nil 73 for _, termIter := range i.termsIters { 74 termIter.iter = nil 75 termIter.segment = segmentMetadata{} 76 } 77 } 78 79 func (i *termsIterFromSegments) reset(segments []segmentMetadata) { 80 i.clear() 81 82 for _, seg := range segments { 83 i.segments = append(i.segments, segmentTermsMetadata{ 84 segment: seg, 85 termsIterable: seg.segment.TermsIterable(), 86 }) 87 } 88 } 89 90 func (i *termsIterFromSegments) setField(field []byte) error { 91 i.clearTermIters() 92 93 // Alloc any required terms iter containers 94 numTermsIterAlloc := len(i.segments) - len(i.termsIters) 95 for j := 0; j < numTermsIterAlloc; j++ { 96 i.termsIters = append(i.termsIters, &termsKeyIter{}) 97 } 98 99 // Add our de-duping multi key value iterator 100 i.keyIter.reset() 101 for j, seg := range i.segments { 102 iter, err := seg.termsIterable.Terms(field) 103 if err != nil { 104 return err 105 } 106 if iter.Empty() { 107 // Don't consume this iterator if no results 108 if err := xerrors.FirstError(iter.Err(), iter.Close()); err != nil { 109 return err 110 } 111 continue 112 } 113 114 tersmKeyIter := i.termsIters[j] 115 tersmKeyIter.iter = iter 116 tersmKeyIter.segment = seg.segment 117 i.keyIter.add(tersmKeyIter) 118 } 119 120 return nil 121 } 122 123 func (i *termsIterFromSegments) Empty() bool { 124 return i.keyIter.Empty() 125 } 126 127 func (i *termsIterFromSegments) Next() bool { 128 for { 129 if i.err != nil { 130 return false 131 } 132 133 if !i.keyIter.Next() { 134 return false 135 } 136 137 // Create the overlayed postings list for this term 138 i.currPostingsList.Reset() 139 for _, iter := range i.keyIter.CurrentIters() { 140 termsKeyIter := iter.(*termsKeyIter) 141 _, list := termsKeyIter.iter.Current() 142 143 if termsKeyIter.segment.offset == 0 && termsKeyIter.segment.skips == 0 { 144 // No offset, which means is first segment we are combining from 145 // so can just direct union. 146 if err := i.currPostingsList.UnionInPlace(list); err != nil { 147 i.err = err 148 return false 149 } 150 continue 151 } 152 153 // We have to take into account offset and duplicates/skips. 154 var ( 155 iter = list.Iterator() 156 negativeOffsets = termsKeyIter.segment.negativeOffsets 157 multiErr = xerrors.NewMultiError() 158 ) 159 for iter.Next() { 160 curr := iter.Current() 161 negativeOffset := negativeOffsets[curr] 162 // Then skip the individual if matches. 163 if negativeOffset == -1 { 164 // Skip this value, as itself is a duplicate. 165 continue 166 } 167 value := curr + termsKeyIter.segment.offset - postings.ID(negativeOffset) 168 if err := i.currPostingsList.Insert(value); err != nil { 169 multiErr = multiErr.Add(err) 170 multiErr = multiErr.Add(iter.Close()) 171 i.err = multiErr.FinalError() 172 return false 173 } 174 } 175 176 multiErr = multiErr.Add(iter.Err()) 177 multiErr = multiErr.Add(iter.Close()) 178 i.err = multiErr.FinalError() 179 if i.err != nil { 180 return false 181 } 182 } 183 184 // Continue looping only if everything skipped or term is empty. 185 if !i.currPostingsList.IsEmpty() { 186 return true 187 } 188 } 189 } 190 191 func (i *termsIterFromSegments) Current() ([]byte, postings.List) { 192 return i.keyIter.Current(), i.currPostingsList 193 } 194 195 func (i *termsIterFromSegments) Err() error { 196 if err := i.keyIter.Err(); err != nil { 197 return err 198 } 199 return i.err 200 } 201 202 func (i *termsIterFromSegments) Close() error { 203 err := i.keyIter.Close() 204 // Free resources 205 i.clearTermIters() 206 return err 207 } 208 209 // termsKeyIter needs to be a keyIterator and contains a terms iterator 210 var _ keyIterator = &termsKeyIter{} 211 212 type termsKeyIter struct { 213 iter segment.TermsIterator 214 segment segmentMetadata 215 } 216 217 func (i *termsKeyIter) Next() bool { 218 return i.iter.Next() 219 } 220 221 func (i *termsKeyIter) Current() []byte { 222 t, _ := i.iter.Current() 223 return t 224 } 225 226 func (i *termsKeyIter) Err() error { 227 return i.iter.Err() 228 } 229 230 func (i *termsKeyIter) Close() error { 231 return i.iter.Close() 232 }