github.com/m3db/m3@v1.5.0/src/dbnode/encoding/m3tsz/encoder.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package m3tsz 22 23 import ( 24 "errors" 25 "math" 26 27 "github.com/m3db/m3/src/dbnode/encoding" 28 "github.com/m3db/m3/src/dbnode/namespace" 29 "github.com/m3db/m3/src/dbnode/ts" 30 "github.com/m3db/m3/src/dbnode/x/xio" 31 "github.com/m3db/m3/src/x/checked" 32 "github.com/m3db/m3/src/x/context" 33 xtime "github.com/m3db/m3/src/x/time" 34 ) 35 36 var ( 37 errEncoderClosed = errors.New("encoder is closed") 38 errNoEncodedDatapoints = errors.New("encoder has no encoded datapoints") 39 ) 40 41 // encoder is an M3TSZ encoder that can encode a stream of data in M3TSZ format. 42 type encoder struct { 43 os encoding.OStream 44 opts encoding.Options 45 markerEncodingScheme *encoding.MarkerEncodingScheme 46 47 // internal bookkeeping 48 tsEncoderState TimestampEncoder 49 floatEnc FloatEncoderAndIterator 50 sigTracker IntSigBitsTracker 51 52 ant ts.Annotation // current annotation 53 54 intVal float64 // current int val 55 numEncoded uint32 // whether any datapoints have been written yet 56 maxMult uint8 // current max multiplier for int vals 57 58 intOptimized bool // whether the encoding scheme is optimized for ints 59 isFloat bool // whether we are encoding ints/floats 60 closed bool 61 } 62 63 // NewEncoder creates a new encoder. 64 func NewEncoder( 65 start xtime.UnixNano, 66 bytes checked.Bytes, 67 intOptimized bool, 68 opts encoding.Options, 69 ) encoding.Encoder { 70 if opts == nil { 71 opts = encoding.NewOptions() 72 } 73 // NB(r): only perform an initial allocation if there is no pool that 74 // will be used for this encoder. If a pool is being used alloc when the 75 // `Reset` method is called. 76 initAllocIfEmpty := opts.EncoderPool() == nil 77 return &encoder{ 78 os: encoding.NewOStream(bytes, initAllocIfEmpty, opts.BytesPool()), 79 opts: opts, 80 markerEncodingScheme: opts.MarkerEncodingScheme(), 81 tsEncoderState: NewTimestampEncoder(start, opts.DefaultTimeUnit(), opts), 82 closed: false, 83 intOptimized: intOptimized, 84 } 85 } 86 87 func (enc *encoder) SetSchema(descr namespace.SchemaDescr) {} 88 89 // Encode encodes the timestamp and the value of a datapoint. 90 func (enc *encoder) Encode(dp ts.Datapoint, tu xtime.Unit, ant ts.Annotation) error { 91 if enc.closed { 92 return errEncoderClosed 93 } 94 95 err := enc.tsEncoderState.WriteTime(enc.os, dp.TimestampNanos, ant, tu) 96 if err != nil { 97 return err 98 } 99 100 if enc.numEncoded == 0 { 101 err = enc.writeFirstValue(dp.Value) 102 } else { 103 err = enc.writeNextValue(dp.Value) 104 } 105 if err == nil { 106 enc.numEncoded++ 107 } 108 109 return err 110 } 111 112 func (enc *encoder) writeFirstValue(v float64) error { 113 if !enc.intOptimized { 114 enc.floatEnc.writeFullFloat(enc.os, math.Float64bits(v)) 115 return nil 116 } 117 118 // Attempt to convert float to int for int optimization 119 val, mult, isFloat, err := convertToIntFloat(v, 0) 120 if err != nil { 121 return err 122 } 123 124 if isFloat { 125 enc.os.WriteBit(opcodeFloatMode) 126 enc.floatEnc.writeFullFloat(enc.os, math.Float64bits(v)) 127 enc.isFloat = true 128 enc.maxMult = mult 129 return nil 130 } 131 132 // val can be converted to int 133 enc.os.WriteBit(opcodeIntMode) 134 enc.intVal = val 135 negDiff := true 136 if val < 0 { 137 negDiff = false 138 val = -1 * val 139 } 140 141 valBits := uint64(int64(val)) 142 numSig := encoding.NumSig(valBits) 143 enc.writeIntSigMult(numSig, mult, false) 144 enc.sigTracker.WriteIntValDiff(enc.os, valBits, negDiff) 145 return nil 146 } 147 148 func (enc *encoder) writeNextValue(v float64) error { 149 if !enc.intOptimized { 150 enc.floatEnc.writeNextFloat(enc.os, math.Float64bits(v)) 151 return nil 152 } 153 154 // Attempt to convert float to int for int optimization 155 val, mult, isFloat, err := convertToIntFloat(v, enc.maxMult) 156 if err != nil { 157 return err 158 } 159 160 var valDiff float64 161 if !isFloat { 162 valDiff = enc.intVal - val 163 } 164 165 if isFloat || valDiff >= maxInt || valDiff <= minInt { 166 enc.writeFloatVal(math.Float64bits(val), mult) 167 return nil 168 } 169 170 enc.writeIntVal(val, mult, isFloat, valDiff) 171 return nil 172 } 173 174 // writeFloatVal writes the value as XOR of the 175 // bits that represent the float 176 func (enc *encoder) writeFloatVal(val uint64, mult uint8) { 177 if !enc.isFloat { 178 // Converting from int to float 179 enc.os.WriteBit(opcodeUpdate) 180 enc.os.WriteBit(opcodeNoRepeat) 181 enc.os.WriteBit(opcodeFloatMode) 182 enc.floatEnc.writeFullFloat(enc.os, val) 183 enc.isFloat = true 184 enc.maxMult = mult 185 return 186 } 187 188 if val == enc.floatEnc.PrevFloatBits { 189 // Value is repeated 190 enc.os.WriteBit(opcodeUpdate) 191 enc.os.WriteBit(opcodeRepeat) 192 return 193 } 194 195 enc.os.WriteBit(opcodeNoUpdate) 196 enc.floatEnc.writeNextFloat(enc.os, val) 197 } 198 199 // writeIntVal writes the val as a diff of ints 200 func (enc *encoder) writeIntVal(val float64, mult uint8, isFloat bool, valDiff float64) { 201 if valDiff == 0 && isFloat == enc.isFloat && mult == enc.maxMult { 202 // Value is repeated 203 enc.os.WriteBit(opcodeUpdate) 204 enc.os.WriteBit(opcodeRepeat) 205 return 206 } 207 208 neg := false 209 if valDiff < 0 { 210 neg = true 211 valDiff = -1 * valDiff 212 } 213 214 valDiffBits := uint64(int64(valDiff)) 215 numSig := encoding.NumSig(valDiffBits) 216 newSig := enc.sigTracker.TrackNewSig(numSig) 217 isFloatChanged := isFloat != enc.isFloat 218 if mult > enc.maxMult || enc.sigTracker.NumSig != newSig || isFloatChanged { 219 enc.os.WriteBit(opcodeUpdate) 220 enc.os.WriteBit(opcodeNoRepeat) 221 enc.os.WriteBit(opcodeIntMode) 222 enc.writeIntSigMult(newSig, mult, isFloatChanged) 223 enc.sigTracker.WriteIntValDiff(enc.os, valDiffBits, neg) 224 enc.isFloat = false 225 } else { 226 enc.os.WriteBit(opcodeNoUpdate) 227 enc.sigTracker.WriteIntValDiff(enc.os, valDiffBits, neg) 228 } 229 230 enc.intVal = val 231 } 232 233 // writeIntSigMult writes the number of significant 234 // bits of the diff and the multiplier if they have changed 235 func (enc *encoder) writeIntSigMult(sig, mult uint8, floatChanged bool) { 236 enc.sigTracker.WriteIntSig(enc.os, sig) 237 238 if mult > enc.maxMult { 239 enc.os.WriteBit(opcodeUpdateMult) 240 enc.os.WriteBits(uint64(mult), numMultBits) 241 enc.maxMult = mult 242 } else if enc.sigTracker.NumSig == sig && enc.maxMult == mult && floatChanged { 243 // If only the float mode has changed, update the Mult regardless 244 // so that we can support the annotation peek 245 enc.os.WriteBit(opcodeUpdateMult) 246 enc.os.WriteBits(uint64(enc.maxMult), numMultBits) 247 } else { 248 enc.os.WriteBit(opcodeNoUpdateMult) 249 } 250 } 251 252 func (enc *encoder) newBuffer(capacity int) checked.Bytes { 253 if bytesPool := enc.opts.BytesPool(); bytesPool != nil { 254 return bytesPool.Get(capacity) 255 } 256 return checked.NewBytes(make([]byte, 0, capacity), nil) 257 } 258 259 // Reset resets the encoder for reuse. 260 func (enc *encoder) Reset( 261 start xtime.UnixNano, 262 capacity int, 263 schema namespace.SchemaDescr, 264 ) { 265 enc.reset(start, enc.newBuffer(capacity)) 266 } 267 268 func (enc *encoder) reset(start xtime.UnixNano, bytes checked.Bytes) { 269 enc.os.Reset(bytes) 270 271 timeUnit := initialTimeUnit(start, enc.opts.DefaultTimeUnit()) 272 enc.tsEncoderState = NewTimestampEncoder(start, timeUnit, enc.opts) 273 274 enc.floatEnc = FloatEncoderAndIterator{} 275 enc.intVal = 0 276 enc.isFloat = false 277 enc.maxMult = 0 278 enc.sigTracker = IntSigBitsTracker{} 279 enc.ant = nil 280 enc.numEncoded = 0 281 enc.closed = false 282 } 283 284 // Stream returns a copy of the underlying data stream. 285 func (enc *encoder) Stream(ctx context.Context) (xio.SegmentReader, bool) { 286 segment := enc.segmentZeroCopy(ctx) 287 if segment.Len() == 0 { 288 return nil, false 289 } 290 291 if readerPool := enc.opts.SegmentReaderPool(); readerPool != nil { 292 reader := readerPool.Get() 293 reader.Reset(segment) 294 return reader, true 295 } 296 return xio.NewSegmentReader(segment), true 297 } 298 299 // NumEncoded returns the number of encoded datapoints. 300 func (enc *encoder) NumEncoded() int { 301 return int(enc.numEncoded) 302 } 303 304 // LastEncoded returns the last encoded datapoint. 305 func (enc *encoder) LastEncoded() (ts.Datapoint, error) { 306 if enc.numEncoded == 0 { 307 return ts.Datapoint{}, errNoEncodedDatapoints 308 } 309 310 result := ts.Datapoint{ 311 TimestampNanos: enc.tsEncoderState.PrevTime, 312 } 313 if enc.isFloat { 314 result.Value = math.Float64frombits(enc.floatEnc.PrevFloatBits) 315 } else { 316 result.Value = enc.intVal 317 } 318 return result, nil 319 } 320 321 func (enc *encoder) LastAnnotationChecksum() (uint64, error) { 322 if enc.numEncoded == 0 { 323 return 0, errNoEncodedDatapoints 324 } 325 326 return enc.tsEncoderState.PrevAnnotationChecksum, nil 327 } 328 329 // Empty returns true when underlying stream is empty. 330 func (enc *encoder) Empty() bool { 331 return enc.os.Empty() 332 } 333 334 // Len returns the length of the final data stream that would be generated 335 // by a call to Stream(). 336 func (enc *encoder) Len() int { 337 raw, pos := enc.os.RawBytes() 338 if len(raw) == 0 { 339 return 0 340 } 341 342 // Calculate how long the stream would be once it was "capped" with a tail. 343 var ( 344 lastIdx = len(raw) - 1 345 lastByte = raw[lastIdx] 346 scheme = enc.markerEncodingScheme 347 tail = scheme.Tail(lastByte, pos) 348 ) 349 tail.IncRef() 350 tailLen := tail.Len() 351 tail.DecRef() 352 353 return len(raw[:lastIdx]) + tailLen 354 } 355 356 // Close closes the encoder. 357 func (enc *encoder) Close() { 358 if enc.closed { 359 return 360 } 361 362 enc.closed = true 363 364 // Ensure to free ref to ostream bytes 365 enc.os.Reset(nil) 366 367 if pool := enc.opts.EncoderPool(); pool != nil { 368 pool.Put(enc) 369 } 370 } 371 372 // Discard closes the encoder and transfers ownership of the data stream to 373 // the caller. 374 func (enc *encoder) Discard() ts.Segment { 375 segment := enc.segmentTakeOwnership() 376 377 // Close the encoder no longer needed 378 enc.Close() 379 380 return segment 381 } 382 383 // DiscardReset does the same thing as Discard except it does not close the encoder but resets it for reuse. 384 func (enc *encoder) DiscardReset( 385 start xtime.UnixNano, 386 capacity int, 387 descr namespace.SchemaDescr, 388 ) ts.Segment { 389 segment := enc.segmentTakeOwnership() 390 enc.Reset(start, capacity, descr) 391 return segment 392 } 393 394 func (enc *encoder) segmentZeroCopy(ctx context.Context) ts.Segment { 395 length := enc.os.Len() 396 if length == 0 { 397 return ts.Segment{} 398 } 399 400 // We need a multibyte tail to capture an immutable snapshot 401 // of the encoder data. 402 rawBuffer, pos := enc.os.RawBytes() 403 lastByte := rawBuffer[length-1] 404 405 // Take ref up to last byte. 406 headBytes := rawBuffer[:length-1] 407 408 // Zero copy from the output stream. 409 var head checked.Bytes 410 if pool := enc.opts.CheckedBytesWrapperPool(); pool != nil { 411 head = pool.Get(headBytes) 412 } else { 413 head = checked.NewBytes(headBytes, nil) 414 } 415 416 // Make sure the ostream bytes ref is delayed from finalizing 417 // until this operation is complete (since this is zero copy). 418 buffer, _ := enc.os.CheckedBytes() 419 ctx.RegisterCloser(buffer.DelayFinalizer()) 420 421 // Take a shared ref to a known good tail. 422 scheme := enc.markerEncodingScheme 423 tail := scheme.Tail(lastByte, pos) 424 425 // NB(r): Finalize the head bytes whether this is by ref or copy. If by 426 // ref we have no ref to it anymore and if by copy then the owner should 427 // be finalizing the bytes when the segment is finalized. 428 return ts.NewSegment(head, tail, 0, ts.FinalizeHead) 429 } 430 431 func (enc *encoder) segmentTakeOwnership() ts.Segment { 432 length := enc.os.Len() 433 if length == 0 { 434 return ts.Segment{} 435 } 436 437 // We need a multibyte tail since the tail isn't set correctly midstream. 438 rawBuffer, pos := enc.os.RawBytes() 439 lastByte := rawBuffer[length-1] 440 441 // Take ref from the ostream. 442 head := enc.os.Discard() 443 444 // Resize to crop out last byte. 445 head.IncRef() 446 head.Resize(length - 1) 447 head.DecRef() 448 449 // Take a shared ref to a known good tail. 450 scheme := enc.markerEncodingScheme 451 tail := scheme.Tail(lastByte, pos) 452 453 // NB(r): Finalize the head bytes whether this is by ref or copy. If by 454 // ref we have no ref to it anymore and if by copy then the owner should 455 // be finalizing the bytes when the segment is finalized. 456 return ts.NewSegment(head, tail, 0, ts.FinalizeHead) 457 }