github.com/m3db/m3@v1.5.0/src/dbnode/encoding/m3tsz/timestamp_encoder.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package m3tsz
    22  
    23  import (
    24  	"encoding/binary"
    25  	"fmt"
    26  	"time"
    27  
    28  	"github.com/cespare/xxhash/v2"
    29  
    30  	"github.com/m3db/m3/src/dbnode/encoding"
    31  	"github.com/m3db/m3/src/dbnode/ts"
    32  	xtime "github.com/m3db/m3/src/x/time"
    33  )
    34  
    35  // TimestampEncoder encapsulates the state required for a logical stream of
    36  // bits that represent a stream of timestamps compressed using delta-of-delta
    37  type TimestampEncoder struct {
    38  	PrevTime               xtime.UnixNano
    39  	PrevTimeDelta          time.Duration
    40  	PrevAnnotationChecksum uint64
    41  
    42  	TimeUnit xtime.Unit
    43  
    44  	markerEncodingScheme *encoding.MarkerEncodingScheme
    45  	timeEncodingSchemes  encoding.TimeEncodingSchemes
    46  
    47  	// Used to keep track of time unit changes that occur directly via the WriteTimeUnit()
    48  	// API as opposed to indirectly via the WriteTime() API.
    49  	timeUnitEncodedManually bool
    50  	// Only taken into account if using the WriteTime() API.
    51  	hasWrittenFirst bool
    52  
    53  	metrics encoding.TimestampEncoderMetrics
    54  }
    55  
    56  var emptyAnnotationChecksum = xxhash.Sum64(nil)
    57  
    58  // NewTimestampEncoder creates a new TimestampEncoder.
    59  func NewTimestampEncoder(
    60  	start xtime.UnixNano, timeUnit xtime.Unit, opts encoding.Options) TimestampEncoder {
    61  	return TimestampEncoder{
    62  		PrevTime:               start,
    63  		TimeUnit:               initialTimeUnit(start, timeUnit),
    64  		PrevAnnotationChecksum: emptyAnnotationChecksum,
    65  		markerEncodingScheme:   opts.MarkerEncodingScheme(),
    66  		timeEncodingSchemes:    opts.TimeEncodingSchemes(),
    67  		metrics:                opts.Metrics().TimestampEncoder,
    68  	}
    69  }
    70  
    71  // WriteTime encode the timestamp using delta-of-delta compression.
    72  func (enc *TimestampEncoder) WriteTime(
    73  	stream encoding.OStream,
    74  	currTime xtime.UnixNano,
    75  	ant ts.Annotation,
    76  	timeUnit xtime.Unit,
    77  ) error {
    78  	if !enc.hasWrittenFirst {
    79  		if err := enc.WriteFirstTime(stream, currTime, ant, timeUnit); err != nil {
    80  			return err
    81  		}
    82  		enc.hasWrittenFirst = true
    83  		return nil
    84  	}
    85  
    86  	return enc.WriteNextTime(stream, currTime, ant, timeUnit)
    87  }
    88  
    89  // WriteFirstTime encodes the first timestamp.
    90  func (enc *TimestampEncoder) WriteFirstTime(
    91  	stream encoding.OStream,
    92  	currTime xtime.UnixNano,
    93  	ant ts.Annotation,
    94  	timeUnit xtime.Unit,
    95  ) error {
    96  	// NB(xichen): Always write the first time in nanoseconds because we don't know
    97  	// if the start time is going to be a multiple of the time unit provided.
    98  	nt := enc.PrevTime
    99  	stream.WriteBits(uint64(nt), 64)
   100  	return enc.WriteNextTime(stream, currTime, ant, timeUnit)
   101  }
   102  
   103  // WriteNextTime encodes the next (non-first) timestamp.
   104  func (enc *TimestampEncoder) WriteNextTime(
   105  	stream encoding.OStream,
   106  	currTime xtime.UnixNano,
   107  	ant ts.Annotation,
   108  	timeUnit xtime.Unit,
   109  ) error {
   110  	enc.writeAnnotation(stream, ant)
   111  	tuChanged := enc.maybeWriteTimeUnitChange(stream, timeUnit)
   112  
   113  	timeDelta := currTime.Sub(enc.PrevTime)
   114  	enc.PrevTime = currTime
   115  	if tuChanged || enc.timeUnitEncodedManually {
   116  		enc.writeDeltaOfDeltaTimeUnitChanged(stream, enc.PrevTimeDelta, timeDelta)
   117  		// NB(xichen): if the time unit has changed, we reset the time delta to zero
   118  		// because we can't guarantee that dt is a multiple of the new time unit, which
   119  		// means we can't guarantee that the delta of delta when encoding the next
   120  		// data point is a multiple of the new time unit.
   121  		enc.PrevTimeDelta = 0
   122  		enc.timeUnitEncodedManually = false
   123  		return nil
   124  	}
   125  	err := enc.writeDeltaOfDeltaTimeUnitUnchanged(
   126  		stream, enc.PrevTimeDelta, timeDelta, timeUnit)
   127  	enc.PrevTimeDelta = timeDelta
   128  	return err
   129  }
   130  
   131  // WriteTimeUnit writes the new time unit into the stream. It exists as a standalone method
   132  // so that other calls can encode time unit changes without relying on the marker scheme.
   133  func (enc *TimestampEncoder) WriteTimeUnit(stream encoding.OStream, timeUnit xtime.Unit) {
   134  	stream.WriteByte(byte(timeUnit))
   135  	enc.TimeUnit = timeUnit
   136  	enc.timeUnitEncodedManually = true
   137  }
   138  
   139  // maybeWriteTimeUnitChange encodes the time unit and returns true if the time unit has
   140  // changed, and false otherwise.
   141  func (enc *TimestampEncoder) maybeWriteTimeUnitChange(stream encoding.OStream, timeUnit xtime.Unit) bool {
   142  	if !enc.shouldWriteTimeUnit(timeUnit) {
   143  		return false
   144  	}
   145  
   146  	scheme := enc.markerEncodingScheme
   147  	encoding.WriteSpecialMarker(stream, scheme, scheme.TimeUnit())
   148  	enc.WriteTimeUnit(stream, timeUnit)
   149  	return true
   150  }
   151  
   152  // shouldWriteTimeUnit determines whether we should write tu as a time unit.
   153  // Returns true if tu is valid and differs from the existing time unit, false otherwise.
   154  func (enc *TimestampEncoder) shouldWriteTimeUnit(timeUnit xtime.Unit) bool {
   155  	if !timeUnit.IsValid() || timeUnit == enc.TimeUnit {
   156  		return false
   157  	}
   158  	return true
   159  }
   160  
   161  // shouldWriteAnnotation determines whether we should write ant as an annotation.
   162  // Returns true if ant is not empty and differs from the existing annotation, false otherwise.
   163  // Also returns the checksum of the given annotation.
   164  func (enc *TimestampEncoder) shouldWriteAnnotation(ant ts.Annotation) (bool, uint64) {
   165  	if len(ant) == 0 {
   166  		return false, emptyAnnotationChecksum
   167  	}
   168  	checksum := xxhash.Sum64(ant)
   169  	return checksum != enc.PrevAnnotationChecksum, checksum
   170  }
   171  
   172  func (enc *TimestampEncoder) writeAnnotation(stream encoding.OStream, ant ts.Annotation) {
   173  	shouldWrite, checksum := enc.shouldWriteAnnotation(ant)
   174  	if !shouldWrite {
   175  		return
   176  	}
   177  
   178  	scheme := enc.markerEncodingScheme
   179  	encoding.WriteSpecialMarker(stream, scheme, scheme.Annotation())
   180  
   181  	var buf [binary.MaxVarintLen32]byte
   182  	// NB: we subtract 1 for possible varint encoding savings
   183  	annotationLength := binary.PutVarint(buf[:], int64(len(ant)-1))
   184  
   185  	stream.WriteBytes(buf[:annotationLength])
   186  	stream.WriteBytes(ant)
   187  
   188  	if enc.PrevAnnotationChecksum != emptyAnnotationChecksum {
   189  		// NB: current assumption is that each time series should have a single annotation write per block
   190  		// and that annotations should be rewritten rarely. If this assumption changes, it might not be worth
   191  		// keeping this metric around.
   192  		enc.metrics.IncAnnotationRewritten()
   193  	}
   194  	enc.PrevAnnotationChecksum = checksum
   195  }
   196  
   197  func (enc *TimestampEncoder) writeDeltaOfDeltaTimeUnitChanged(
   198  	stream encoding.OStream, prevDelta, curDelta time.Duration) {
   199  	// NB(xichen): if the time unit has changed, always normalize delta-of-delta
   200  	// to nanoseconds and encode it using 64 bits.
   201  	dodInNano := int64(curDelta - prevDelta)
   202  	stream.WriteBits(uint64(dodInNano), 64)
   203  }
   204  
   205  func (enc *TimestampEncoder) writeDeltaOfDeltaTimeUnitUnchanged(
   206  	stream encoding.OStream, prevDelta, curDelta time.Duration, timeUnit xtime.Unit) error {
   207  	u, err := timeUnit.Value()
   208  	if err != nil {
   209  		return err
   210  	}
   211  
   212  	deltaOfDelta := xtime.ToNormalizedDuration(curDelta-prevDelta, u)
   213  	if timeUnit == xtime.Millisecond || timeUnit == xtime.Second {
   214  		// Only milliseconds and seconds are encoded using
   215  		// up to 32 bits (see defaultTimeEncodingSchemes).
   216  		dod32 := int32(deltaOfDelta)
   217  		if int64(dod32) != deltaOfDelta {
   218  			return fmt.Errorf(
   219  				"deltaOfDelta value %d %s overflows 32 bits", deltaOfDelta, timeUnit)
   220  		}
   221  	}
   222  
   223  	tes, exists := enc.timeEncodingSchemes.SchemeForUnit(timeUnit)
   224  	if !exists {
   225  		return errNoTimeSchemaForUnit
   226  	}
   227  
   228  	if deltaOfDelta == 0 {
   229  		zeroBucket := tes.ZeroBucket()
   230  		stream.WriteBits(zeroBucket.Opcode(), zeroBucket.NumOpcodeBits())
   231  		return nil
   232  	}
   233  
   234  	buckets := tes.Buckets()
   235  	for i := 0; i < len(buckets); i++ {
   236  		if deltaOfDelta >= buckets[i].Min() && deltaOfDelta <= buckets[i].Max() {
   237  			stream.WriteBits(buckets[i].Opcode(), buckets[i].NumOpcodeBits())
   238  			stream.WriteBits(uint64(deltaOfDelta), buckets[i].NumValueBits())
   239  			return nil
   240  		}
   241  	}
   242  	defaultBucket := tes.DefaultBucket()
   243  	stream.WriteBits(defaultBucket.Opcode(), defaultBucket.NumOpcodeBits())
   244  	stream.WriteBits(uint64(deltaOfDelta), defaultBucket.NumValueBits())
   245  	return nil
   246  }
   247  
   248  func initialTimeUnit(start xtime.UnixNano, tu xtime.Unit) xtime.Unit {
   249  	tv, err := tu.Value()
   250  	if err != nil {
   251  		return xtime.None
   252  	}
   253  	// If we want to use tu as the time unit for start, start must
   254  	// be a multiple of tu.
   255  	if start%xtime.UnixNano(tv) == 0 {
   256  		return tu
   257  	}
   258  	return xtime.None
   259  }