github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/encoding/types.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package encoding 22 23 import ( 24 "time" 25 26 "github.com/m3db/m3/src/dbnode/namespace" 27 "github.com/m3db/m3/src/dbnode/ts" 28 "github.com/m3db/m3/src/dbnode/x/xio" 29 "github.com/m3db/m3/src/dbnode/x/xpool" 30 "github.com/m3db/m3/src/x/checked" 31 xcontext "github.com/m3db/m3/src/x/context" 32 "github.com/m3db/m3/src/x/ident" 33 "github.com/m3db/m3/src/x/pool" 34 "github.com/m3db/m3/src/x/serialize" 35 xtime "github.com/m3db/m3/src/x/time" 36 ) 37 38 // Encoder is the generic interface for different types of encoders. 39 type Encoder interface { 40 // SetSchema sets up the schema needed by schema-aware encoder to encode the stream. 41 // SetSchema can be called multiple times between reset for mid-stream schema changes. 42 SetSchema(descr namespace.SchemaDescr) 43 44 // Encode encodes a datapoint and optionally an annotation. 45 // Schema must be set prior to Encode for schema-aware encoder. A schema can be set 46 // via Reset/DiscardReset/SetSchema. 47 Encode(dp ts.Datapoint, unit xtime.Unit, annotation ts.Annotation) error 48 49 // Stream is the streaming interface for reading encoded bytes in the encoder. 50 // A boolean is returned indicating whether the returned xio.SegmentReader contains 51 // any data (true) or is empty (false) to encourage callers to remember to handle 52 // the special case where there is an empty stream. 53 // NB(r): The underlying byte slice will not be returned to the pool until the context 54 // passed to this method is closed, so to avoid not returning the 55 // encoder's buffer back to the pool when it is completed be sure to call 56 // close on the context eventually. 57 Stream(ctx xcontext.Context) (xio.SegmentReader, bool) 58 59 // NumEncoded returns the number of encoded datapoints. 60 NumEncoded() int 61 62 // LastEncoded returns the last encoded datapoint, useful for 63 // de-duplicating encoded values. If there are no previously encoded values 64 // an error is returned. 65 LastEncoded() (ts.Datapoint, error) 66 67 // LastAnnotationChecksum returns the checksum of the last annotation, useful for 68 // de-duplicating encoded values. If there are no previously encoded values 69 // an error is returned. 70 LastAnnotationChecksum() (uint64, error) 71 72 // Empty returns true when encoder is considered empty. 73 Empty() bool 74 75 // Len returns the length of the encoded stream as returned by a call to Stream(). 76 Len() int 77 78 // Reset resets the start time of the encoder and the internal state. 79 // Reset sets up the schema for schema-aware encoders such as proto encoders. 80 Reset(t xtime.UnixNano, capacity int, schema namespace.SchemaDescr) 81 82 // Close closes the encoder and if pooled will return it to the pool. 83 Close() 84 85 // Discard will take ownership of the encoder data and if pooled will return the encoder to the pool. 86 Discard() ts.Segment 87 88 // DiscardReset will take ownership of the encoder data and reset the encoder for reuse. 89 // DiscardReset sets up the schema for schema-aware encoders such as proto encoders. 90 DiscardReset(t xtime.UnixNano, capacity int, schema namespace.SchemaDescr) ts.Segment 91 } 92 93 // NewEncoderFn creates a new encoder. 94 type NewEncoderFn func(start time.Time, bytes []byte) Encoder 95 96 // Options represents different options for encoding time as well as markers. 97 type Options interface { 98 // SetDefaultTimeUnit sets the default time unit for the encoder. 99 SetDefaultTimeUnit(tu xtime.Unit) Options 100 101 // DefaultTimeUnit returns the default time unit for the encoder. 102 DefaultTimeUnit() xtime.Unit 103 104 // SetTimeEncodingSchemes sets the time encoding schemes for different time units. 105 SetTimeEncodingSchemes(value map[xtime.Unit]TimeEncodingScheme) Options 106 107 // TimeEncodingSchemes returns the time encoding schemes for different time units. 108 TimeEncodingSchemes() TimeEncodingSchemes 109 110 // SetMarkerEncodingScheme sets the marker encoding scheme. 111 SetMarkerEncodingScheme(value *MarkerEncodingScheme) Options 112 113 // MarkerEncodingScheme returns the marker encoding scheme. 114 MarkerEncodingScheme() *MarkerEncodingScheme 115 116 // SetEncoderPool sets the encoder pool. 117 SetEncoderPool(value EncoderPool) Options 118 119 // EncoderPool returns the encoder pool. 120 EncoderPool() EncoderPool 121 122 // SetReaderIteratorPool sets the ReaderIteratorPool. 123 SetReaderIteratorPool(value ReaderIteratorPool) Options 124 125 // ReaderIteratorPool returns the ReaderIteratorPool. 126 ReaderIteratorPool() ReaderIteratorPool 127 128 // SetBytesPool sets the bytes pool. 129 SetBytesPool(value pool.CheckedBytesPool) Options 130 131 // BytesPool returns the bytes pool. 132 BytesPool() pool.CheckedBytesPool 133 134 // SetSegmentReaderPool sets the segment reader pool. 135 SetSegmentReaderPool(value xio.SegmentReaderPool) Options 136 137 // SegmentReaderPool returns the segment reader pool. 138 SegmentReaderPool() xio.SegmentReaderPool 139 140 // SetCheckedBytesWrapperPool sets the checked bytes wrapper pool. 141 SetCheckedBytesWrapperPool(value xpool.CheckedBytesWrapperPool) Options 142 143 // CheckedBytesWrapperPool returns the checked bytes wrapper pool. 144 CheckedBytesWrapperPool() xpool.CheckedBytesWrapperPool 145 146 // SetByteFieldDictionaryLRUSize sets theByteFieldDictionaryLRUSize which controls 147 // how many recently seen byte field values will be maintained in the compression 148 // dictionaries LRU when compressing / decompressing byte fields in ProtoBuf messages. 149 // Increasing this value can potentially lead to better compression at the cost of 150 // using more memory for storing metadata when compressing / decompressing. 151 SetByteFieldDictionaryLRUSize(value int) Options 152 153 // ByteFieldDictionaryLRUSize returns the ByteFieldDictionaryLRUSize. 154 ByteFieldDictionaryLRUSize() int 155 156 // SetIStreamReaderSizeM3TSZ sets the IStream bufio reader size 157 // for m3tsz encoding iteration. 158 SetIStreamReaderSizeM3TSZ(value int) Options 159 160 // IStreamReaderSizeM3TSZ returns the IStream bufio reader size 161 // for m3tsz encoding iteration. 162 IStreamReaderSizeM3TSZ() int 163 164 // SetIStreamReaderSizeProto sets the IStream bufio reader size 165 // for proto encoding iteration. 166 SetIStreamReaderSizeProto(value int) Options 167 168 // IStreamReaderSizeProto returns the IStream bufio reader size 169 // for proto encoding iteration. 170 IStreamReaderSizeProto() int 171 172 // SetMetrics sets the encoding metrics. 173 SetMetrics(value Metrics) Options 174 175 // Metrics returns the encoding metrics. 176 Metrics() Metrics 177 } 178 179 // Iterator is the generic interface for iterating over encoded data. 180 type Iterator interface { 181 // Next moves to the next item. 182 Next() bool 183 184 // Current returns the value as well as the annotation associated with the 185 // current datapoint. Users should not hold on to the returned Annotation 186 // object as it may get invalidated when the iterator calls Next(). 187 Current() (ts.Datapoint, xtime.Unit, ts.Annotation) 188 189 // Err returns the error encountered. 190 Err() error 191 192 // Close closes the iterator and if pooled will return to the pool. 193 Close() 194 } 195 196 // ReaderIterator is the interface for a single-reader iterator. 197 type ReaderIterator interface { 198 Iterator 199 200 // Reset resets the iterator to read from a new reader with 201 // a new schema (for schema aware iterators). 202 Reset(reader xio.Reader64, schema namespace.SchemaDescr) 203 } 204 205 // MultiReaderIterator is an iterator that iterates in order over 206 // a list of sets of internally ordered but not collectively in order 207 // readers, it also deduplicates datapoints. 208 type MultiReaderIterator interface { 209 Iterator 210 211 // Reset resets the iterator to read from a slice of readers 212 // with a new schema (for schema aware iterators). 213 Reset(readers []xio.SegmentReader, start xtime.UnixNano, 214 blockSize time.Duration, schema namespace.SchemaDescr) 215 216 // ResetSliceOfSlices resets the iterator to read from a slice of slice readers 217 // with a new schema (for schema aware iterators). 218 ResetSliceOfSlices( 219 readers xio.ReaderSliceOfSlicesIterator, 220 schema namespace.SchemaDescr, 221 ) 222 223 // Readers exposes the underlying ReaderSliceOfSlicesIterator 224 // for this MultiReaderIterator. 225 Readers() xio.ReaderSliceOfSlicesIterator 226 227 // Schema exposes the underlying SchemaDescr for this MultiReaderIterator. 228 Schema() namespace.SchemaDescr 229 } 230 231 // SeriesIteratorAccumulator is an accumulator for SeriesIterator iterators, 232 // that gathers incoming SeriesIterators and builds a unified SeriesIterator. 233 type SeriesIteratorAccumulator interface { 234 SeriesIterator 235 236 // Add adds a series iterator. 237 Add(it SeriesIterator) error 238 } 239 240 // SeriesIterator is an iterator that iterates over a set of iterators from 241 // different replicas and de-dupes & merges results from the replicas for a 242 // given series while also applying a time filter on top of the values in 243 // case replicas returned values out of range on either end. 244 type SeriesIterator interface { 245 Iterator 246 247 // ID gets the ID of the series. 248 ID() ident.ID 249 250 // Namespace gets the namespace of the series. 251 Namespace() ident.ID 252 253 // Start returns the start time filter specified for the iterator. 254 Start() xtime.UnixNano 255 256 // End returns the end time filter specified for the iterator. 257 End() xtime.UnixNano 258 259 // FirstAnnotation returns the value of the first annotation (disregarding the filter) 260 // on the underlying iterators. Only use after the first call to Next() has returned true. 261 // Consumers must make a copy of the returned slice as it will be invalidated by Reset. 262 FirstAnnotation() ts.Annotation 263 264 // Reset resets the iterator to read from a set of iterators from different 265 // replicas, one must note that this can be an array with nil entries if 266 // some replicas did not return successfully. 267 // NB: the SeriesIterator assumes ownership of the provided ids, this 268 // includes calling `id.Finalize()` upon iter.Close(). 269 Reset(opts SeriesIteratorOptions) 270 271 // IterateEqualTimestampStrategy returns the current strategy. 272 IterateEqualTimestampStrategy() IterateEqualTimestampStrategy 273 274 // SetIterateEqualTimestampStrategy sets the equal timestamp strategy of how 275 // to select a value when the timestamp matches differing values with the same 276 // timestamp from different replicas. 277 // It can be set at any time and will apply to the current value returned 278 // from the iterator immediately. 279 SetIterateEqualTimestampStrategy(strategy IterateEqualTimestampStrategy) 280 281 // Stats provides information for this SeriesIterator. 282 Stats() (SeriesIteratorStats, error) 283 284 // Replicas exposes the underlying MultiReaderIterator slice 285 // for this SeriesIterator. 286 Replicas() ([]MultiReaderIterator, error) 287 288 // Tags returns an iterator over the tags associated with the ID. 289 Tags() ident.TagIterator 290 } 291 292 // SeriesIteratorStats contains information about a SeriesIterator. 293 type SeriesIteratorStats struct { 294 // ApproximateSizeInBytes approximates how much data is contained within the 295 // SeriesIterator, in bytes. 296 ApproximateSizeInBytes int 297 } 298 299 // SeriesIteratorConsolidator optionally defines methods to consolidate series iterators. 300 type SeriesIteratorConsolidator interface { 301 // ConsolidateReplicas consolidates MultiReaderIterator slices. 302 ConsolidateReplicas(replicas []MultiReaderIterator) ([]MultiReaderIterator, error) 303 } 304 305 // SeriesIteratorOptions is a set of options for using a series iterator. 306 type SeriesIteratorOptions struct { 307 ID ident.ID 308 Namespace ident.ID 309 Tags ident.TagIterator 310 Replicas []MultiReaderIterator 311 StartInclusive xtime.UnixNano 312 EndExclusive xtime.UnixNano 313 IterateEqualTimestampStrategy IterateEqualTimestampStrategy 314 SeriesIteratorConsolidator SeriesIteratorConsolidator 315 } 316 317 // SeriesIterators is a collection of SeriesIterator that can 318 // close all iterators. 319 type SeriesIterators interface { 320 // Iters returns the array of series iterators. 321 Iters() []SeriesIterator 322 323 // Len returns the count of iterators in the collection. 324 Len() int 325 326 // Close closes all iterators contained within the collection. 327 Close() 328 } 329 330 // MutableSeriesIterators is a mutable SeriesIterators. 331 type MutableSeriesIterators interface { 332 SeriesIterators 333 334 // Reset the iters collection to a size for reuse. 335 Reset(size int) 336 337 // SetAt sets a SeriesIterator to the given index. 338 SetAt(idx int, iter SeriesIterator) 339 } 340 341 // Decoder is the generic interface for different types of decoders. 342 type Decoder interface { 343 // Decode decodes the encoded data in the reader. 344 Decode(reader xio.Reader64) ReaderIterator 345 } 346 347 // NewDecoderFn creates a new decoder. 348 type NewDecoderFn func() Decoder 349 350 // EncoderAllocate allocates an encoder for a pool. 351 type EncoderAllocate func() Encoder 352 353 // ReaderIteratorAllocate allocates a ReaderIterator for a pool. 354 type ReaderIteratorAllocate func(reader xio.Reader64, descr namespace.SchemaDescr) ReaderIterator 355 356 // OStream encapsulates a writable stream. 357 type OStream interface { 358 // Len returns the length of the OStream. 359 Len() int 360 // Empty returns whether the OStream is empty. 361 Empty() bool 362 363 // WriteBit writes the last bit of v. 364 WriteBit(v Bit) 365 366 // WriteBits writes the lowest numBits of v to the stream, starting 367 // from the most significant bit to the least significant bit. 368 WriteBits(v uint64, numBits int) 369 370 // WriteByte writes the last byte of v. 371 WriteByte(v byte) 372 373 // WriteBytes writes a byte slice. 374 WriteBytes(bytes []byte) 375 376 // Write writes a byte slice. This method exists in addition to WriteBytes() 377 // to satisfy the io.Writer interface. 378 Write(bytes []byte) (int, error) 379 380 // Reset resets the ostream. 381 Reset(buffer checked.Bytes) 382 383 // Discard takes the ref to the checked bytes from the OStream. 384 Discard() checked.Bytes 385 386 // RawBytes returns the OStream's raw bytes. Note that this does not transfer 387 // ownership of the data and bypasses the checked.Bytes accounting so 388 // callers should: 389 // 1. Only use the returned slice as a "read-only" snapshot of the 390 // data in a context where the caller has at least a read lock 391 // on the ostream itself. 392 // 2. Use this function with care. 393 RawBytes() ([]byte, int) 394 395 // CheckedBytes returns the written stream as checked bytes. 396 CheckedBytes() (checked.Bytes, int) 397 } 398 399 // EncoderPool provides a pool for encoders. 400 type EncoderPool interface { 401 // Init initializes the pool. 402 Init(alloc EncoderAllocate) 403 404 // Get provides an encoder from the pool. 405 Get() Encoder 406 407 // Put returns an encoder to the pool. 408 Put(e Encoder) 409 } 410 411 // ReaderIteratorPool provides a pool for ReaderIterators. 412 type ReaderIteratorPool interface { 413 // Init initializes the pool. 414 Init(alloc ReaderIteratorAllocate) 415 416 // Get provides a ReaderIterator from the pool. 417 Get() ReaderIterator 418 419 // Put returns a ReaderIterator to the pool. 420 Put(iter ReaderIterator) 421 } 422 423 // MultiReaderIteratorPool provides a pool for MultiReaderIterators. 424 type MultiReaderIteratorPool interface { 425 // Init initializes the pool. 426 Init(alloc ReaderIteratorAllocate) 427 428 // Get provides a MultiReaderIterator from the pool. 429 Get() MultiReaderIterator 430 431 // Put returns a MultiReaderIterator to the pool. 432 Put(iter MultiReaderIterator) 433 } 434 435 // SeriesIteratorPool provides a pool for SeriesIterator. 436 type SeriesIteratorPool interface { 437 // Init initializes the pool. 438 Init() 439 440 // Get provides a SeriesIterator from the pool. 441 Get() SeriesIterator 442 443 // Put returns a SeriesIterator to the pool. 444 Put(iter SeriesIterator) 445 } 446 447 // MultiReaderIteratorArrayPool provides a pool for MultiReaderIterator arrays. 448 type MultiReaderIteratorArrayPool interface { 449 // Init initializes the pool. 450 Init() 451 452 // Get provides a MultiReaderIterator array from the pool. 453 Get(size int) []MultiReaderIterator 454 455 // Put returns a MultiReaderIterator array to the pool. 456 Put(iters []MultiReaderIterator) 457 } 458 459 // IteratorPools exposes a small subset of iterator pools that are sufficient 460 // for clients to rebuild SeriesIterator. 461 type IteratorPools interface { 462 // MultiReaderIteratorArray exposes the session MultiReaderIteratorArrayPool. 463 MultiReaderIteratorArray() MultiReaderIteratorArrayPool 464 465 // MultiReaderIterator exposes the session MultiReaderIteratorPool. 466 MultiReaderIterator() MultiReaderIteratorPool 467 468 // SeriesIterator exposes the session SeriesIteratorPool. 469 SeriesIterator() SeriesIteratorPool 470 471 // CheckedBytesWrapper exposes the session CheckedBytesWrapperPool. 472 CheckedBytesWrapper() xpool.CheckedBytesWrapperPool 473 474 // ID exposes the session identity pool. 475 ID() ident.Pool 476 477 // TagEncoder exposes the session tag encoder pool. 478 TagEncoder() serialize.TagEncoderPool 479 480 // TagDecoder exposes the session tag decoder pool. 481 TagDecoder() serialize.TagDecoderPool 482 }