github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/types.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package segment 22 23 import ( 24 "errors" 25 26 "github.com/m3db/m3/src/m3ninx/doc" 27 "github.com/m3db/m3/src/m3ninx/index" 28 "github.com/m3db/m3/src/m3ninx/postings" 29 ) 30 31 // ErrClosed is the error returned when attempting to perform operations on a 32 // segment that has already been closed. 33 var ErrClosed = errors.New("segment has been closed") 34 35 // Segment is a sub-collection of documents within an index. 36 type Segment interface { 37 // FieldsIterable returns an iterable fields, for which is not 38 // safe for concurrent use. For concurrent use call FieldsIterable 39 // multiple times. 40 FieldsIterable() FieldsIterable 41 42 // TermsIterable returns an iterable terms, for which is not 43 // safe for concurrent use. For concurrent use call TermsIterable 44 // multiple times. 45 TermsIterable() TermsIterable 46 47 // Size returns the number of documents within the Segment. It returns 48 // 0 if the Segment has been closed. 49 Size() int64 50 51 // ContainsID returns a bool indicating if the Segment contains the provided ID. 52 ContainsID(docID []byte) (bool, error) 53 54 // ContainsField returns a bool indicating if the Segment contains the provided field. 55 ContainsField(field []byte) (bool, error) 56 57 // Reader returns a point-in-time accessor to search the segment. 58 Reader() (Reader, error) 59 60 // Close closes the segment and releases any internal resources. 61 Close() error 62 } 63 64 // Reader extends index reader interface to allow for reading 65 // of fields and terms. 66 type Reader interface { 67 index.Reader 68 FieldsIterable 69 TermsIterable 70 FieldsPostingsListIterable 71 72 // ContainsField returns a bool indicating if the Segment contains the provided field. 73 ContainsField(field []byte) (bool, error) 74 } 75 76 // FieldsIterable can iterate over segment fields, it is not by default 77 // concurrency safe. 78 type FieldsIterable interface { 79 // Fields returns an iterator over the list of known fields, in order 80 // by name, it is not valid for reading after mutating the 81 // builder by inserting more documents. 82 Fields() (FieldsIterator, error) 83 } 84 85 // FieldsPostingsListIterable can iterate over segment fields/postings lists, it is not by default 86 // concurrency safe. 87 type FieldsPostingsListIterable interface { 88 // Fields returns an iterator over the list of known fields, in order 89 // by name, it is not valid for reading after mutating the 90 // builder by inserting more documents. 91 FieldsPostingsList() (FieldsPostingsListIterator, error) 92 } 93 94 // TermsIterable can iterate over segment terms, it is not by default 95 // concurrency safe. 96 type TermsIterable interface { 97 // Terms returns an iterator over the known terms values for the given 98 // field, in order by name, it is not valid for reading after mutating the 99 // builder by inserting more documents. 100 Terms(field []byte) (TermsIterator, error) 101 } 102 103 // OrderedBytesIterator iterates over a collection of []bytes in lexicographical order. 104 type OrderedBytesIterator interface { 105 // Next returns a bool indicating if there are any more elements. 106 Next() bool 107 108 // Current returns the current element. 109 // NB: the element returned is only valid until the subsequent call to Next(). 110 Current() []byte 111 112 // Err returns any errors encountered during iteration. 113 Err() error 114 115 // Close releases any resources held by the iterator. 116 Close() error 117 } 118 119 // FieldsPostingsListIterator iterates over all known fields. 120 type FieldsPostingsListIterator interface { 121 Iterator 122 123 // Current returns the current field and associated postings list. 124 // NB: the field returned is only valid until the subsequent call to Next(). 125 Current() ([]byte, postings.List) 126 } 127 128 // FieldsIterator iterates over all known fields. 129 type FieldsIterator interface { 130 Iterator 131 132 // Current returns the current field. 133 // NB: the field returned is only valid until the subsequent call to Next(). 134 Current() []byte 135 136 // Empty returns true if there are no fields in the iterator. 137 Empty() bool 138 } 139 140 // TermsIterator iterates over all known terms for the provided field. 141 type TermsIterator interface { 142 Iterator 143 144 // Current returns the current element. 145 // NB: the element returned is only valid until the subsequent call to Next(). 146 Current() (term []byte, postings postings.List) 147 148 // Empty returns true if there are no terms. 149 Empty() bool 150 } 151 152 // Iterator holds common iterator methods. 153 type Iterator interface { 154 // Next returns a bool indicating if there are any more elements. 155 Next() bool 156 157 // Err returns any errors encountered during iteration. 158 Err() error 159 160 // Close releases any resources held by the iterator. 161 Close() error 162 } 163 164 // MutableSegment is a segment which can be updated. 165 type MutableSegment interface { 166 Segment 167 DocumentsBuilder 168 169 // Fields returns an iterator over the list of known fields, in order 170 // by name, it is not valid for reading after mutating the 171 // builder by inserting more documents. 172 Fields() (FieldsIterator, error) 173 174 // Seal marks the Mutable Segment immutable. 175 Seal() error 176 177 // IsSealed returns true iff the segment is open and un-sealed. 178 IsSealed() bool 179 } 180 181 // ImmutableSegment is segment that has been written to disk. 182 type ImmutableSegment interface { 183 Segment 184 185 FreeMmap() error 186 } 187 188 // Builder is a builder that can be used to construct segments. 189 type Builder interface { 190 FieldsPostingsListIterable 191 TermsIterable 192 193 // Reset resets the builder for reuse. 194 Reset() 195 196 // Docs returns the current docs slice, this is not safe to modify 197 // and is invalidated on a call to reset. 198 Docs() []doc.Metadata 199 200 // AllDocs returns an iterator over the documents known to the Reader. 201 AllDocs() (index.IDDocIterator, error) 202 } 203 204 // DocumentsBuilder is a builder that has documents written to it. 205 type DocumentsBuilder interface { 206 Builder 207 index.Writer 208 209 // SetIndexConcurrency sets the concurrency used for building the segment. 210 SetIndexConcurrency(value int) 211 212 // IndexConcurrency returns the concurrency used for building the segment. 213 IndexConcurrency() int 214 } 215 216 // CloseableDocumentsBuilder is a builder that has documents written to it and has freeable resources. 217 type CloseableDocumentsBuilder interface { 218 DocumentsBuilder 219 220 Close() error 221 } 222 223 // SegmentsBuilder is a builder that is built from segments. 224 type SegmentsBuilder interface { 225 Builder 226 227 // SetFilter sets a filter on which documents to retain 228 // when building the segment. 229 SetFilter(keep DocumentsFilter) 230 231 // AddSegments adds segments to build from. 232 AddSegments(segments []Segment) error 233 234 // SegmentMetadatas returns the segment builder segment metadata. 235 SegmentMetadatas() ([]SegmentsBuilderSegmentMetadata, error) 236 } 237 238 // SegmentsBuilderSegmentMetadata is a set of metadata about a segment 239 // that was used to build a compacted segment. 240 type SegmentsBuilderSegmentMetadata struct { 241 Segment Segment 242 Offset postings.ID 243 // NegativeOffsets is a lookup of document IDs are duplicates or should be skipped, 244 // that is documents that are already contained by other segments or should 245 // not be included in the output segment and hence should not be returned 246 // when looking up documents. If this is the case offset is -1. 247 // If a document ID is not a duplicate or skipped then the offset is 248 // the shift that should be applied when translating this postings ID 249 // to the result postings ID. 250 NegativeOffsets []int64 251 Skips int64 252 } 253 254 // DocumentsFilter is a documents filter. 255 type DocumentsFilter interface { 256 // Contains is true if the document passes the filter. 257 ContainsDoc(d doc.Metadata) bool 258 // OnDuplicateDoc is a callback for when a duplicate document is 259 // encountered which is then removed from the resulting segment. 260 OnDuplicateDoc(d doc.Metadata) 261 }