github.com/m3db/m3@v1.5.0/src/dbnode/encoding/m3tsz/encoder.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package m3tsz
    22  
    23  import (
    24  	"errors"
    25  	"math"
    26  
    27  	"github.com/m3db/m3/src/dbnode/encoding"
    28  	"github.com/m3db/m3/src/dbnode/namespace"
    29  	"github.com/m3db/m3/src/dbnode/ts"
    30  	"github.com/m3db/m3/src/dbnode/x/xio"
    31  	"github.com/m3db/m3/src/x/checked"
    32  	"github.com/m3db/m3/src/x/context"
    33  	xtime "github.com/m3db/m3/src/x/time"
    34  )
    35  
    36  var (
    37  	errEncoderClosed       = errors.New("encoder is closed")
    38  	errNoEncodedDatapoints = errors.New("encoder has no encoded datapoints")
    39  )
    40  
    41  // encoder is an M3TSZ encoder that can encode a stream of data in M3TSZ format.
    42  type encoder struct {
    43  	os                   encoding.OStream
    44  	opts                 encoding.Options
    45  	markerEncodingScheme *encoding.MarkerEncodingScheme
    46  
    47  	// internal bookkeeping
    48  	tsEncoderState TimestampEncoder
    49  	floatEnc       FloatEncoderAndIterator
    50  	sigTracker     IntSigBitsTracker
    51  
    52  	ant ts.Annotation // current annotation
    53  
    54  	intVal     float64 // current int val
    55  	numEncoded uint32  // whether any datapoints have been written yet
    56  	maxMult    uint8   // current max multiplier for int vals
    57  
    58  	intOptimized bool // whether the encoding scheme is optimized for ints
    59  	isFloat      bool // whether we are encoding ints/floats
    60  	closed       bool
    61  }
    62  
    63  // NewEncoder creates a new encoder.
    64  func NewEncoder(
    65  	start xtime.UnixNano,
    66  	bytes checked.Bytes,
    67  	intOptimized bool,
    68  	opts encoding.Options,
    69  ) encoding.Encoder {
    70  	if opts == nil {
    71  		opts = encoding.NewOptions()
    72  	}
    73  	// NB(r): only perform an initial allocation if there is no pool that
    74  	// will be used for this encoder.  If a pool is being used alloc when the
    75  	// `Reset` method is called.
    76  	initAllocIfEmpty := opts.EncoderPool() == nil
    77  	return &encoder{
    78  		os:                   encoding.NewOStream(bytes, initAllocIfEmpty, opts.BytesPool()),
    79  		opts:                 opts,
    80  		markerEncodingScheme: opts.MarkerEncodingScheme(),
    81  		tsEncoderState:       NewTimestampEncoder(start, opts.DefaultTimeUnit(), opts),
    82  		closed:               false,
    83  		intOptimized:         intOptimized,
    84  	}
    85  }
    86  
    87  func (enc *encoder) SetSchema(descr namespace.SchemaDescr) {}
    88  
    89  // Encode encodes the timestamp and the value of a datapoint.
    90  func (enc *encoder) Encode(dp ts.Datapoint, tu xtime.Unit, ant ts.Annotation) error {
    91  	if enc.closed {
    92  		return errEncoderClosed
    93  	}
    94  
    95  	err := enc.tsEncoderState.WriteTime(enc.os, dp.TimestampNanos, ant, tu)
    96  	if err != nil {
    97  		return err
    98  	}
    99  
   100  	if enc.numEncoded == 0 {
   101  		err = enc.writeFirstValue(dp.Value)
   102  	} else {
   103  		err = enc.writeNextValue(dp.Value)
   104  	}
   105  	if err == nil {
   106  		enc.numEncoded++
   107  	}
   108  
   109  	return err
   110  }
   111  
   112  func (enc *encoder) writeFirstValue(v float64) error {
   113  	if !enc.intOptimized {
   114  		enc.floatEnc.writeFullFloat(enc.os, math.Float64bits(v))
   115  		return nil
   116  	}
   117  
   118  	// Attempt to convert float to int for int optimization
   119  	val, mult, isFloat, err := convertToIntFloat(v, 0)
   120  	if err != nil {
   121  		return err
   122  	}
   123  
   124  	if isFloat {
   125  		enc.os.WriteBit(opcodeFloatMode)
   126  		enc.floatEnc.writeFullFloat(enc.os, math.Float64bits(v))
   127  		enc.isFloat = true
   128  		enc.maxMult = mult
   129  		return nil
   130  	}
   131  
   132  	// val can be converted to int
   133  	enc.os.WriteBit(opcodeIntMode)
   134  	enc.intVal = val
   135  	negDiff := true
   136  	if val < 0 {
   137  		negDiff = false
   138  		val = -1 * val
   139  	}
   140  
   141  	valBits := uint64(int64(val))
   142  	numSig := encoding.NumSig(valBits)
   143  	enc.writeIntSigMult(numSig, mult, false)
   144  	enc.sigTracker.WriteIntValDiff(enc.os, valBits, negDiff)
   145  	return nil
   146  }
   147  
   148  func (enc *encoder) writeNextValue(v float64) error {
   149  	if !enc.intOptimized {
   150  		enc.floatEnc.writeNextFloat(enc.os, math.Float64bits(v))
   151  		return nil
   152  	}
   153  
   154  	// Attempt to convert float to int for int optimization
   155  	val, mult, isFloat, err := convertToIntFloat(v, enc.maxMult)
   156  	if err != nil {
   157  		return err
   158  	}
   159  
   160  	var valDiff float64
   161  	if !isFloat {
   162  		valDiff = enc.intVal - val
   163  	}
   164  
   165  	if isFloat || valDiff >= maxInt || valDiff <= minInt {
   166  		enc.writeFloatVal(math.Float64bits(val), mult)
   167  		return nil
   168  	}
   169  
   170  	enc.writeIntVal(val, mult, isFloat, valDiff)
   171  	return nil
   172  }
   173  
   174  // writeFloatVal writes the value as XOR of the
   175  // bits that represent the float
   176  func (enc *encoder) writeFloatVal(val uint64, mult uint8) {
   177  	if !enc.isFloat {
   178  		// Converting from int to float
   179  		enc.os.WriteBit(opcodeUpdate)
   180  		enc.os.WriteBit(opcodeNoRepeat)
   181  		enc.os.WriteBit(opcodeFloatMode)
   182  		enc.floatEnc.writeFullFloat(enc.os, val)
   183  		enc.isFloat = true
   184  		enc.maxMult = mult
   185  		return
   186  	}
   187  
   188  	if val == enc.floatEnc.PrevFloatBits {
   189  		// Value is repeated
   190  		enc.os.WriteBit(opcodeUpdate)
   191  		enc.os.WriteBit(opcodeRepeat)
   192  		return
   193  	}
   194  
   195  	enc.os.WriteBit(opcodeNoUpdate)
   196  	enc.floatEnc.writeNextFloat(enc.os, val)
   197  }
   198  
   199  // writeIntVal writes the val as a diff of ints
   200  func (enc *encoder) writeIntVal(val float64, mult uint8, isFloat bool, valDiff float64) {
   201  	if valDiff == 0 && isFloat == enc.isFloat && mult == enc.maxMult {
   202  		// Value is repeated
   203  		enc.os.WriteBit(opcodeUpdate)
   204  		enc.os.WriteBit(opcodeRepeat)
   205  		return
   206  	}
   207  
   208  	neg := false
   209  	if valDiff < 0 {
   210  		neg = true
   211  		valDiff = -1 * valDiff
   212  	}
   213  
   214  	valDiffBits := uint64(int64(valDiff))
   215  	numSig := encoding.NumSig(valDiffBits)
   216  	newSig := enc.sigTracker.TrackNewSig(numSig)
   217  	isFloatChanged := isFloat != enc.isFloat
   218  	if mult > enc.maxMult || enc.sigTracker.NumSig != newSig || isFloatChanged {
   219  		enc.os.WriteBit(opcodeUpdate)
   220  		enc.os.WriteBit(opcodeNoRepeat)
   221  		enc.os.WriteBit(opcodeIntMode)
   222  		enc.writeIntSigMult(newSig, mult, isFloatChanged)
   223  		enc.sigTracker.WriteIntValDiff(enc.os, valDiffBits, neg)
   224  		enc.isFloat = false
   225  	} else {
   226  		enc.os.WriteBit(opcodeNoUpdate)
   227  		enc.sigTracker.WriteIntValDiff(enc.os, valDiffBits, neg)
   228  	}
   229  
   230  	enc.intVal = val
   231  }
   232  
   233  // writeIntSigMult writes the number of significant
   234  // bits of the diff and the multiplier if they have changed
   235  func (enc *encoder) writeIntSigMult(sig, mult uint8, floatChanged bool) {
   236  	enc.sigTracker.WriteIntSig(enc.os, sig)
   237  
   238  	if mult > enc.maxMult {
   239  		enc.os.WriteBit(opcodeUpdateMult)
   240  		enc.os.WriteBits(uint64(mult), numMultBits)
   241  		enc.maxMult = mult
   242  	} else if enc.sigTracker.NumSig == sig && enc.maxMult == mult && floatChanged {
   243  		// If only the float mode has changed, update the Mult regardless
   244  		// so that we can support the annotation peek
   245  		enc.os.WriteBit(opcodeUpdateMult)
   246  		enc.os.WriteBits(uint64(enc.maxMult), numMultBits)
   247  	} else {
   248  		enc.os.WriteBit(opcodeNoUpdateMult)
   249  	}
   250  }
   251  
   252  func (enc *encoder) newBuffer(capacity int) checked.Bytes {
   253  	if bytesPool := enc.opts.BytesPool(); bytesPool != nil {
   254  		return bytesPool.Get(capacity)
   255  	}
   256  	return checked.NewBytes(make([]byte, 0, capacity), nil)
   257  }
   258  
   259  // Reset resets the encoder for reuse.
   260  func (enc *encoder) Reset(
   261  	start xtime.UnixNano,
   262  	capacity int,
   263  	schema namespace.SchemaDescr,
   264  ) {
   265  	enc.reset(start, enc.newBuffer(capacity))
   266  }
   267  
   268  func (enc *encoder) reset(start xtime.UnixNano, bytes checked.Bytes) {
   269  	enc.os.Reset(bytes)
   270  
   271  	timeUnit := initialTimeUnit(start, enc.opts.DefaultTimeUnit())
   272  	enc.tsEncoderState = NewTimestampEncoder(start, timeUnit, enc.opts)
   273  
   274  	enc.floatEnc = FloatEncoderAndIterator{}
   275  	enc.intVal = 0
   276  	enc.isFloat = false
   277  	enc.maxMult = 0
   278  	enc.sigTracker = IntSigBitsTracker{}
   279  	enc.ant = nil
   280  	enc.numEncoded = 0
   281  	enc.closed = false
   282  }
   283  
   284  // Stream returns a copy of the underlying data stream.
   285  func (enc *encoder) Stream(ctx context.Context) (xio.SegmentReader, bool) {
   286  	segment := enc.segmentZeroCopy(ctx)
   287  	if segment.Len() == 0 {
   288  		return nil, false
   289  	}
   290  
   291  	if readerPool := enc.opts.SegmentReaderPool(); readerPool != nil {
   292  		reader := readerPool.Get()
   293  		reader.Reset(segment)
   294  		return reader, true
   295  	}
   296  	return xio.NewSegmentReader(segment), true
   297  }
   298  
   299  // NumEncoded returns the number of encoded datapoints.
   300  func (enc *encoder) NumEncoded() int {
   301  	return int(enc.numEncoded)
   302  }
   303  
   304  // LastEncoded returns the last encoded datapoint.
   305  func (enc *encoder) LastEncoded() (ts.Datapoint, error) {
   306  	if enc.numEncoded == 0 {
   307  		return ts.Datapoint{}, errNoEncodedDatapoints
   308  	}
   309  
   310  	result := ts.Datapoint{
   311  		TimestampNanos: enc.tsEncoderState.PrevTime,
   312  	}
   313  	if enc.isFloat {
   314  		result.Value = math.Float64frombits(enc.floatEnc.PrevFloatBits)
   315  	} else {
   316  		result.Value = enc.intVal
   317  	}
   318  	return result, nil
   319  }
   320  
   321  func (enc *encoder) LastAnnotationChecksum() (uint64, error) {
   322  	if enc.numEncoded == 0 {
   323  		return 0, errNoEncodedDatapoints
   324  	}
   325  
   326  	return enc.tsEncoderState.PrevAnnotationChecksum, nil
   327  }
   328  
   329  // Empty returns true when underlying stream is empty.
   330  func (enc *encoder) Empty() bool {
   331  	return enc.os.Empty()
   332  }
   333  
   334  // Len returns the length of the final data stream that would be generated
   335  // by a call to Stream().
   336  func (enc *encoder) Len() int {
   337  	raw, pos := enc.os.RawBytes()
   338  	if len(raw) == 0 {
   339  		return 0
   340  	}
   341  
   342  	// Calculate how long the stream would be once it was "capped" with a tail.
   343  	var (
   344  		lastIdx  = len(raw) - 1
   345  		lastByte = raw[lastIdx]
   346  		scheme   = enc.markerEncodingScheme
   347  		tail     = scheme.Tail(lastByte, pos)
   348  	)
   349  	tail.IncRef()
   350  	tailLen := tail.Len()
   351  	tail.DecRef()
   352  
   353  	return len(raw[:lastIdx]) + tailLen
   354  }
   355  
   356  // Close closes the encoder.
   357  func (enc *encoder) Close() {
   358  	if enc.closed {
   359  		return
   360  	}
   361  
   362  	enc.closed = true
   363  
   364  	// Ensure to free ref to ostream bytes
   365  	enc.os.Reset(nil)
   366  
   367  	if pool := enc.opts.EncoderPool(); pool != nil {
   368  		pool.Put(enc)
   369  	}
   370  }
   371  
   372  // Discard closes the encoder and transfers ownership of the data stream to
   373  // the caller.
   374  func (enc *encoder) Discard() ts.Segment {
   375  	segment := enc.segmentTakeOwnership()
   376  
   377  	// Close the encoder no longer needed
   378  	enc.Close()
   379  
   380  	return segment
   381  }
   382  
   383  // DiscardReset does the same thing as Discard except it does not close the encoder but resets it for reuse.
   384  func (enc *encoder) DiscardReset(
   385  	start xtime.UnixNano,
   386  	capacity int,
   387  	descr namespace.SchemaDescr,
   388  ) ts.Segment {
   389  	segment := enc.segmentTakeOwnership()
   390  	enc.Reset(start, capacity, descr)
   391  	return segment
   392  }
   393  
   394  func (enc *encoder) segmentZeroCopy(ctx context.Context) ts.Segment {
   395  	length := enc.os.Len()
   396  	if length == 0 {
   397  		return ts.Segment{}
   398  	}
   399  
   400  	// We need a multibyte tail to capture an immutable snapshot
   401  	// of the encoder data.
   402  	rawBuffer, pos := enc.os.RawBytes()
   403  	lastByte := rawBuffer[length-1]
   404  
   405  	// Take ref up to last byte.
   406  	headBytes := rawBuffer[:length-1]
   407  
   408  	// Zero copy from the output stream.
   409  	var head checked.Bytes
   410  	if pool := enc.opts.CheckedBytesWrapperPool(); pool != nil {
   411  		head = pool.Get(headBytes)
   412  	} else {
   413  		head = checked.NewBytes(headBytes, nil)
   414  	}
   415  
   416  	// Make sure the ostream bytes ref is delayed from finalizing
   417  	// until this operation is complete (since this is zero copy).
   418  	buffer, _ := enc.os.CheckedBytes()
   419  	ctx.RegisterCloser(buffer.DelayFinalizer())
   420  
   421  	// Take a shared ref to a known good tail.
   422  	scheme := enc.markerEncodingScheme
   423  	tail := scheme.Tail(lastByte, pos)
   424  
   425  	// NB(r): Finalize the head bytes whether this is by ref or copy. If by
   426  	// ref we have no ref to it anymore and if by copy then the owner should
   427  	// be finalizing the bytes when the segment is finalized.
   428  	return ts.NewSegment(head, tail, 0, ts.FinalizeHead)
   429  }
   430  
   431  func (enc *encoder) segmentTakeOwnership() ts.Segment {
   432  	length := enc.os.Len()
   433  	if length == 0 {
   434  		return ts.Segment{}
   435  	}
   436  
   437  	// We need a multibyte tail since the tail isn't set correctly midstream.
   438  	rawBuffer, pos := enc.os.RawBytes()
   439  	lastByte := rawBuffer[length-1]
   440  
   441  	// Take ref from the ostream.
   442  	head := enc.os.Discard()
   443  
   444  	// Resize to crop out last byte.
   445  	head.IncRef()
   446  	head.Resize(length - 1)
   447  	head.DecRef()
   448  
   449  	// Take a shared ref to a known good tail.
   450  	scheme := enc.markerEncodingScheme
   451  	tail := scheme.Tail(lastByte, pos)
   452  
   453  	// NB(r): Finalize the head bytes whether this is by ref or copy. If by
   454  	// ref we have no ref to it anymore and if by copy then the owner should
   455  	// be finalizing the bytes when the segment is finalized.
   456  	return ts.NewSegment(head, tail, 0, ts.FinalizeHead)
   457  }