github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/encoding/types.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package encoding
    22  
    23  import (
    24  	"time"
    25  
    26  	"github.com/m3db/m3/src/dbnode/namespace"
    27  	"github.com/m3db/m3/src/dbnode/ts"
    28  	"github.com/m3db/m3/src/dbnode/x/xio"
    29  	"github.com/m3db/m3/src/dbnode/x/xpool"
    30  	"github.com/m3db/m3/src/x/checked"
    31  	xcontext "github.com/m3db/m3/src/x/context"
    32  	"github.com/m3db/m3/src/x/ident"
    33  	"github.com/m3db/m3/src/x/pool"
    34  	"github.com/m3db/m3/src/x/serialize"
    35  	xtime "github.com/m3db/m3/src/x/time"
    36  )
    37  
    38  // Encoder is the generic interface for different types of encoders.
    39  type Encoder interface {
    40  	// SetSchema sets up the schema needed by schema-aware encoder to encode the stream.
    41  	// SetSchema can be called multiple times between reset for mid-stream schema changes.
    42  	SetSchema(descr namespace.SchemaDescr)
    43  
    44  	// Encode encodes a datapoint and optionally an annotation.
    45  	// Schema must be set prior to Encode for schema-aware encoder. A schema can be set
    46  	// via Reset/DiscardReset/SetSchema.
    47  	Encode(dp ts.Datapoint, unit xtime.Unit, annotation ts.Annotation) error
    48  
    49  	// Stream is the streaming interface for reading encoded bytes in the encoder.
    50  	// A boolean is returned indicating whether the returned xio.SegmentReader contains
    51  	// any data (true) or is empty (false) to encourage callers to remember to handle
    52  	// the special case where there is an empty stream.
    53  	// NB(r): The underlying byte slice will not be returned to the pool until the context
    54  	// passed to this method is closed, so to avoid not returning the
    55  	// encoder's buffer back to the pool when it is completed be sure to call
    56  	// close on the context eventually.
    57  	Stream(ctx xcontext.Context) (xio.SegmentReader, bool)
    58  
    59  	// NumEncoded returns the number of encoded datapoints.
    60  	NumEncoded() int
    61  
    62  	// LastEncoded returns the last encoded datapoint, useful for
    63  	// de-duplicating encoded values. If there are no previously encoded values
    64  	// an error is returned.
    65  	LastEncoded() (ts.Datapoint, error)
    66  
    67  	// LastAnnotationChecksum returns the checksum of the last annotation, useful for
    68  	// de-duplicating encoded values. If there are no previously encoded values
    69  	// an error is returned.
    70  	LastAnnotationChecksum() (uint64, error)
    71  
    72  	// Empty returns true when encoder is considered empty.
    73  	Empty() bool
    74  
    75  	// Len returns the length of the encoded stream as returned by a call to Stream().
    76  	Len() int
    77  
    78  	// Reset resets the start time of the encoder and the internal state.
    79  	// Reset sets up the schema for schema-aware encoders such as proto encoders.
    80  	Reset(t xtime.UnixNano, capacity int, schema namespace.SchemaDescr)
    81  
    82  	// Close closes the encoder and if pooled will return it to the pool.
    83  	Close()
    84  
    85  	// Discard will take ownership of the encoder data and if pooled will return the encoder to the pool.
    86  	Discard() ts.Segment
    87  
    88  	// DiscardReset will take ownership of the encoder data and reset the encoder for reuse.
    89  	// DiscardReset sets up the schema for schema-aware encoders such as proto encoders.
    90  	DiscardReset(t xtime.UnixNano, capacity int, schema namespace.SchemaDescr) ts.Segment
    91  }
    92  
    93  // NewEncoderFn creates a new encoder.
    94  type NewEncoderFn func(start time.Time, bytes []byte) Encoder
    95  
    96  // Options represents different options for encoding time as well as markers.
    97  type Options interface {
    98  	// SetDefaultTimeUnit sets the default time unit for the encoder.
    99  	SetDefaultTimeUnit(tu xtime.Unit) Options
   100  
   101  	// DefaultTimeUnit returns the default time unit for the encoder.
   102  	DefaultTimeUnit() xtime.Unit
   103  
   104  	// SetTimeEncodingSchemes sets the time encoding schemes for different time units.
   105  	SetTimeEncodingSchemes(value map[xtime.Unit]TimeEncodingScheme) Options
   106  
   107  	// TimeEncodingSchemes returns the time encoding schemes for different time units.
   108  	TimeEncodingSchemes() TimeEncodingSchemes
   109  
   110  	// SetMarkerEncodingScheme sets the marker encoding scheme.
   111  	SetMarkerEncodingScheme(value *MarkerEncodingScheme) Options
   112  
   113  	// MarkerEncodingScheme returns the marker encoding scheme.
   114  	MarkerEncodingScheme() *MarkerEncodingScheme
   115  
   116  	// SetEncoderPool sets the encoder pool.
   117  	SetEncoderPool(value EncoderPool) Options
   118  
   119  	// EncoderPool returns the encoder pool.
   120  	EncoderPool() EncoderPool
   121  
   122  	// SetReaderIteratorPool sets the ReaderIteratorPool.
   123  	SetReaderIteratorPool(value ReaderIteratorPool) Options
   124  
   125  	// ReaderIteratorPool returns the ReaderIteratorPool.
   126  	ReaderIteratorPool() ReaderIteratorPool
   127  
   128  	// SetBytesPool sets the bytes pool.
   129  	SetBytesPool(value pool.CheckedBytesPool) Options
   130  
   131  	// BytesPool returns the bytes pool.
   132  	BytesPool() pool.CheckedBytesPool
   133  
   134  	// SetSegmentReaderPool sets the segment reader pool.
   135  	SetSegmentReaderPool(value xio.SegmentReaderPool) Options
   136  
   137  	// SegmentReaderPool returns the segment reader pool.
   138  	SegmentReaderPool() xio.SegmentReaderPool
   139  
   140  	// SetCheckedBytesWrapperPool sets the checked bytes wrapper pool.
   141  	SetCheckedBytesWrapperPool(value xpool.CheckedBytesWrapperPool) Options
   142  
   143  	// CheckedBytesWrapperPool returns the checked bytes wrapper pool.
   144  	CheckedBytesWrapperPool() xpool.CheckedBytesWrapperPool
   145  
   146  	// SetByteFieldDictionaryLRUSize sets theByteFieldDictionaryLRUSize which controls
   147  	// how many recently seen byte field values will be maintained in the compression
   148  	// dictionaries LRU when compressing / decompressing byte fields in ProtoBuf messages.
   149  	// Increasing this value can potentially lead to better compression at the cost of
   150  	// using more memory for storing metadata when compressing / decompressing.
   151  	SetByteFieldDictionaryLRUSize(value int) Options
   152  
   153  	// ByteFieldDictionaryLRUSize returns the ByteFieldDictionaryLRUSize.
   154  	ByteFieldDictionaryLRUSize() int
   155  
   156  	// SetIStreamReaderSizeM3TSZ sets the IStream bufio reader size
   157  	// for m3tsz encoding iteration.
   158  	SetIStreamReaderSizeM3TSZ(value int) Options
   159  
   160  	// IStreamReaderSizeM3TSZ returns the IStream bufio reader size
   161  	// for m3tsz encoding iteration.
   162  	IStreamReaderSizeM3TSZ() int
   163  
   164  	// SetIStreamReaderSizeProto sets the IStream bufio reader size
   165  	// for proto encoding iteration.
   166  	SetIStreamReaderSizeProto(value int) Options
   167  
   168  	// IStreamReaderSizeProto returns the IStream bufio reader size
   169  	// for proto encoding iteration.
   170  	IStreamReaderSizeProto() int
   171  
   172  	// SetMetrics sets the encoding metrics.
   173  	SetMetrics(value Metrics) Options
   174  
   175  	// Metrics returns the encoding metrics.
   176  	Metrics() Metrics
   177  }
   178  
   179  // Iterator is the generic interface for iterating over encoded data.
   180  type Iterator interface {
   181  	// Next moves to the next item.
   182  	Next() bool
   183  
   184  	// Current returns the value as well as the annotation associated with the
   185  	// current datapoint. Users should not hold on to the returned Annotation
   186  	// object as it may get invalidated when the iterator calls Next().
   187  	Current() (ts.Datapoint, xtime.Unit, ts.Annotation)
   188  
   189  	// Err returns the error encountered.
   190  	Err() error
   191  
   192  	// Close closes the iterator and if pooled will return to the pool.
   193  	Close()
   194  }
   195  
   196  // ReaderIterator is the interface for a single-reader iterator.
   197  type ReaderIterator interface {
   198  	Iterator
   199  
   200  	// Reset resets the iterator to read from a new reader with
   201  	// a new schema (for schema aware iterators).
   202  	Reset(reader xio.Reader64, schema namespace.SchemaDescr)
   203  }
   204  
   205  // MultiReaderIterator is an iterator that iterates in order over
   206  // a list of sets of internally ordered but not collectively in order
   207  // readers, it also deduplicates datapoints.
   208  type MultiReaderIterator interface {
   209  	Iterator
   210  
   211  	// Reset resets the iterator to read from a slice of readers
   212  	// with a new schema (for schema aware iterators).
   213  	Reset(readers []xio.SegmentReader, start xtime.UnixNano,
   214  		blockSize time.Duration, schema namespace.SchemaDescr)
   215  
   216  	// ResetSliceOfSlices resets the iterator to read from a slice of slice readers
   217  	// with a new schema (for schema aware iterators).
   218  	ResetSliceOfSlices(
   219  		readers xio.ReaderSliceOfSlicesIterator,
   220  		schema namespace.SchemaDescr,
   221  	)
   222  
   223  	// Readers exposes the underlying ReaderSliceOfSlicesIterator
   224  	// for this MultiReaderIterator.
   225  	Readers() xio.ReaderSliceOfSlicesIterator
   226  
   227  	// Schema exposes the underlying SchemaDescr for this MultiReaderIterator.
   228  	Schema() namespace.SchemaDescr
   229  }
   230  
   231  // SeriesIteratorAccumulator is an accumulator for SeriesIterator iterators,
   232  // that gathers incoming SeriesIterators and builds a unified SeriesIterator.
   233  type SeriesIteratorAccumulator interface {
   234  	SeriesIterator
   235  
   236  	// Add adds a series iterator.
   237  	Add(it SeriesIterator) error
   238  }
   239  
   240  // SeriesIterator is an iterator that iterates over a set of iterators from
   241  // different replicas and de-dupes & merges results from the replicas for a
   242  // given series while also applying a time filter on top of the values in
   243  // case replicas returned values out of range on either end.
   244  type SeriesIterator interface {
   245  	Iterator
   246  
   247  	// ID gets the ID of the series.
   248  	ID() ident.ID
   249  
   250  	// Namespace gets the namespace of the series.
   251  	Namespace() ident.ID
   252  
   253  	// Start returns the start time filter specified for the iterator.
   254  	Start() xtime.UnixNano
   255  
   256  	// End returns the end time filter specified for the iterator.
   257  	End() xtime.UnixNano
   258  
   259  	// FirstAnnotation returns the value of the first annotation (disregarding the filter)
   260  	// on the underlying iterators. Only use after the first call to Next() has returned true.
   261  	// Consumers must make a copy of the returned slice as it will be invalidated by Reset.
   262  	FirstAnnotation() ts.Annotation
   263  
   264  	// Reset resets the iterator to read from a set of iterators from different
   265  	// replicas, one  must note that this can be an array with nil entries if
   266  	// some replicas did not return successfully.
   267  	// NB: the SeriesIterator assumes ownership of the provided ids, this
   268  	// includes calling `id.Finalize()` upon iter.Close().
   269  	Reset(opts SeriesIteratorOptions)
   270  
   271  	// IterateEqualTimestampStrategy returns the current strategy.
   272  	IterateEqualTimestampStrategy() IterateEqualTimestampStrategy
   273  
   274  	// SetIterateEqualTimestampStrategy sets the equal timestamp strategy of how
   275  	// to select a value when the timestamp matches differing values with the same
   276  	// timestamp from different replicas.
   277  	// It can be set at any time and will apply to the current value returned
   278  	// from the iterator immediately.
   279  	SetIterateEqualTimestampStrategy(strategy IterateEqualTimestampStrategy)
   280  
   281  	// Stats provides information for this SeriesIterator.
   282  	Stats() (SeriesIteratorStats, error)
   283  
   284  	// Replicas exposes the underlying MultiReaderIterator slice
   285  	// for this SeriesIterator.
   286  	Replicas() ([]MultiReaderIterator, error)
   287  
   288  	// Tags returns an iterator over the tags associated with the ID.
   289  	Tags() ident.TagIterator
   290  }
   291  
   292  // SeriesIteratorStats contains information about a SeriesIterator.
   293  type SeriesIteratorStats struct {
   294  	// ApproximateSizeInBytes approximates how much data is contained within the
   295  	// SeriesIterator, in bytes.
   296  	ApproximateSizeInBytes int
   297  }
   298  
   299  // SeriesIteratorConsolidator optionally defines methods to consolidate series iterators.
   300  type SeriesIteratorConsolidator interface {
   301  	// ConsolidateReplicas consolidates MultiReaderIterator slices.
   302  	ConsolidateReplicas(replicas []MultiReaderIterator) ([]MultiReaderIterator, error)
   303  }
   304  
   305  // SeriesIteratorOptions is a set of options for using a series iterator.
   306  type SeriesIteratorOptions struct {
   307  	ID                            ident.ID
   308  	Namespace                     ident.ID
   309  	Tags                          ident.TagIterator
   310  	Replicas                      []MultiReaderIterator
   311  	StartInclusive                xtime.UnixNano
   312  	EndExclusive                  xtime.UnixNano
   313  	IterateEqualTimestampStrategy IterateEqualTimestampStrategy
   314  	SeriesIteratorConsolidator    SeriesIteratorConsolidator
   315  }
   316  
   317  // SeriesIterators is a collection of SeriesIterator that can
   318  // close all iterators.
   319  type SeriesIterators interface {
   320  	// Iters returns the array of series iterators.
   321  	Iters() []SeriesIterator
   322  
   323  	// Len returns the count of iterators in the collection.
   324  	Len() int
   325  
   326  	// Close closes all iterators contained within the collection.
   327  	Close()
   328  }
   329  
   330  // MutableSeriesIterators is a mutable SeriesIterators.
   331  type MutableSeriesIterators interface {
   332  	SeriesIterators
   333  
   334  	// Reset the iters collection to a size for reuse.
   335  	Reset(size int)
   336  
   337  	// SetAt sets a SeriesIterator to the given index.
   338  	SetAt(idx int, iter SeriesIterator)
   339  }
   340  
   341  // Decoder is the generic interface for different types of decoders.
   342  type Decoder interface {
   343  	// Decode decodes the encoded data in the reader.
   344  	Decode(reader xio.Reader64) ReaderIterator
   345  }
   346  
   347  // NewDecoderFn creates a new decoder.
   348  type NewDecoderFn func() Decoder
   349  
   350  // EncoderAllocate allocates an encoder for a pool.
   351  type EncoderAllocate func() Encoder
   352  
   353  // ReaderIteratorAllocate allocates a ReaderIterator for a pool.
   354  type ReaderIteratorAllocate func(reader xio.Reader64, descr namespace.SchemaDescr) ReaderIterator
   355  
   356  // OStream encapsulates a writable stream.
   357  type OStream interface {
   358  	// Len returns the length of the OStream.
   359  	Len() int
   360  	// Empty returns whether the OStream is empty.
   361  	Empty() bool
   362  
   363  	// WriteBit writes the last bit of v.
   364  	WriteBit(v Bit)
   365  
   366  	// WriteBits writes the lowest numBits of v to the stream, starting
   367  	// from the most significant bit to the least significant bit.
   368  	WriteBits(v uint64, numBits int)
   369  
   370  	// WriteByte writes the last byte of v.
   371  	WriteByte(v byte)
   372  
   373  	// WriteBytes writes a byte slice.
   374  	WriteBytes(bytes []byte)
   375  
   376  	// Write writes a byte slice. This method exists in addition to WriteBytes()
   377  	// to satisfy the io.Writer interface.
   378  	Write(bytes []byte) (int, error)
   379  
   380  	// Reset resets the ostream.
   381  	Reset(buffer checked.Bytes)
   382  
   383  	// Discard takes the ref to the checked bytes from the OStream.
   384  	Discard() checked.Bytes
   385  
   386  	// RawBytes returns the OStream's raw bytes. Note that this does not transfer
   387  	// ownership of the data and bypasses the checked.Bytes accounting so
   388  	// callers should:
   389  	//     1. Only use the returned slice as a "read-only" snapshot of the
   390  	//        data in a context where the caller has at least a read lock
   391  	//        on the ostream itself.
   392  	//     2. Use this function with care.
   393  	RawBytes() ([]byte, int)
   394  
   395  	// CheckedBytes returns the written stream as checked bytes.
   396  	CheckedBytes() (checked.Bytes, int)
   397  }
   398  
   399  // EncoderPool provides a pool for encoders.
   400  type EncoderPool interface {
   401  	// Init initializes the pool.
   402  	Init(alloc EncoderAllocate)
   403  
   404  	// Get provides an encoder from the pool.
   405  	Get() Encoder
   406  
   407  	// Put returns an encoder to the pool.
   408  	Put(e Encoder)
   409  }
   410  
   411  // ReaderIteratorPool provides a pool for ReaderIterators.
   412  type ReaderIteratorPool interface {
   413  	// Init initializes the pool.
   414  	Init(alloc ReaderIteratorAllocate)
   415  
   416  	// Get provides a ReaderIterator from the pool.
   417  	Get() ReaderIterator
   418  
   419  	// Put returns a ReaderIterator to the pool.
   420  	Put(iter ReaderIterator)
   421  }
   422  
   423  // MultiReaderIteratorPool provides a pool for MultiReaderIterators.
   424  type MultiReaderIteratorPool interface {
   425  	// Init initializes the pool.
   426  	Init(alloc ReaderIteratorAllocate)
   427  
   428  	// Get provides a MultiReaderIterator from the pool.
   429  	Get() MultiReaderIterator
   430  
   431  	// Put returns a MultiReaderIterator to the pool.
   432  	Put(iter MultiReaderIterator)
   433  }
   434  
   435  // SeriesIteratorPool provides a pool for SeriesIterator.
   436  type SeriesIteratorPool interface {
   437  	// Init initializes the pool.
   438  	Init()
   439  
   440  	// Get provides a SeriesIterator from the pool.
   441  	Get() SeriesIterator
   442  
   443  	// Put returns a SeriesIterator to the pool.
   444  	Put(iter SeriesIterator)
   445  }
   446  
   447  // MultiReaderIteratorArrayPool provides a pool for MultiReaderIterator arrays.
   448  type MultiReaderIteratorArrayPool interface {
   449  	// Init initializes the pool.
   450  	Init()
   451  
   452  	// Get provides a MultiReaderIterator array from the pool.
   453  	Get(size int) []MultiReaderIterator
   454  
   455  	// Put returns a MultiReaderIterator array to the pool.
   456  	Put(iters []MultiReaderIterator)
   457  }
   458  
   459  // IteratorPools exposes a small subset of iterator pools that are sufficient
   460  // for clients to rebuild SeriesIterator.
   461  type IteratorPools interface {
   462  	// MultiReaderIteratorArray exposes the session MultiReaderIteratorArrayPool.
   463  	MultiReaderIteratorArray() MultiReaderIteratorArrayPool
   464  
   465  	// MultiReaderIterator exposes the session MultiReaderIteratorPool.
   466  	MultiReaderIterator() MultiReaderIteratorPool
   467  
   468  	// SeriesIterator exposes the session SeriesIteratorPool.
   469  	SeriesIterator() SeriesIteratorPool
   470  
   471  	// CheckedBytesWrapper exposes the session CheckedBytesWrapperPool.
   472  	CheckedBytesWrapper() xpool.CheckedBytesWrapperPool
   473  
   474  	// ID exposes the session identity pool.
   475  	ID() ident.Pool
   476  
   477  	// TagEncoder exposes the session tag encoder pool.
   478  	TagEncoder() serialize.TagEncoderPool
   479  
   480  	// TagDecoder exposes the session tag decoder pool.
   481  	TagDecoder() serialize.TagDecoderPool
   482  }