github.com/m3db/m3@v1.5.0/src/dbnode/encoding/m3tsz/timestamp_encoder.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package m3tsz 22 23 import ( 24 "encoding/binary" 25 "fmt" 26 "time" 27 28 "github.com/cespare/xxhash/v2" 29 30 "github.com/m3db/m3/src/dbnode/encoding" 31 "github.com/m3db/m3/src/dbnode/ts" 32 xtime "github.com/m3db/m3/src/x/time" 33 ) 34 35 // TimestampEncoder encapsulates the state required for a logical stream of 36 // bits that represent a stream of timestamps compressed using delta-of-delta 37 type TimestampEncoder struct { 38 PrevTime xtime.UnixNano 39 PrevTimeDelta time.Duration 40 PrevAnnotationChecksum uint64 41 42 TimeUnit xtime.Unit 43 44 markerEncodingScheme *encoding.MarkerEncodingScheme 45 timeEncodingSchemes encoding.TimeEncodingSchemes 46 47 // Used to keep track of time unit changes that occur directly via the WriteTimeUnit() 48 // API as opposed to indirectly via the WriteTime() API. 49 timeUnitEncodedManually bool 50 // Only taken into account if using the WriteTime() API. 51 hasWrittenFirst bool 52 53 metrics encoding.TimestampEncoderMetrics 54 } 55 56 var emptyAnnotationChecksum = xxhash.Sum64(nil) 57 58 // NewTimestampEncoder creates a new TimestampEncoder. 59 func NewTimestampEncoder( 60 start xtime.UnixNano, timeUnit xtime.Unit, opts encoding.Options) TimestampEncoder { 61 return TimestampEncoder{ 62 PrevTime: start, 63 TimeUnit: initialTimeUnit(start, timeUnit), 64 PrevAnnotationChecksum: emptyAnnotationChecksum, 65 markerEncodingScheme: opts.MarkerEncodingScheme(), 66 timeEncodingSchemes: opts.TimeEncodingSchemes(), 67 metrics: opts.Metrics().TimestampEncoder, 68 } 69 } 70 71 // WriteTime encode the timestamp using delta-of-delta compression. 72 func (enc *TimestampEncoder) WriteTime( 73 stream encoding.OStream, 74 currTime xtime.UnixNano, 75 ant ts.Annotation, 76 timeUnit xtime.Unit, 77 ) error { 78 if !enc.hasWrittenFirst { 79 if err := enc.WriteFirstTime(stream, currTime, ant, timeUnit); err != nil { 80 return err 81 } 82 enc.hasWrittenFirst = true 83 return nil 84 } 85 86 return enc.WriteNextTime(stream, currTime, ant, timeUnit) 87 } 88 89 // WriteFirstTime encodes the first timestamp. 90 func (enc *TimestampEncoder) WriteFirstTime( 91 stream encoding.OStream, 92 currTime xtime.UnixNano, 93 ant ts.Annotation, 94 timeUnit xtime.Unit, 95 ) error { 96 // NB(xichen): Always write the first time in nanoseconds because we don't know 97 // if the start time is going to be a multiple of the time unit provided. 98 nt := enc.PrevTime 99 stream.WriteBits(uint64(nt), 64) 100 return enc.WriteNextTime(stream, currTime, ant, timeUnit) 101 } 102 103 // WriteNextTime encodes the next (non-first) timestamp. 104 func (enc *TimestampEncoder) WriteNextTime( 105 stream encoding.OStream, 106 currTime xtime.UnixNano, 107 ant ts.Annotation, 108 timeUnit xtime.Unit, 109 ) error { 110 enc.writeAnnotation(stream, ant) 111 tuChanged := enc.maybeWriteTimeUnitChange(stream, timeUnit) 112 113 timeDelta := currTime.Sub(enc.PrevTime) 114 enc.PrevTime = currTime 115 if tuChanged || enc.timeUnitEncodedManually { 116 enc.writeDeltaOfDeltaTimeUnitChanged(stream, enc.PrevTimeDelta, timeDelta) 117 // NB(xichen): if the time unit has changed, we reset the time delta to zero 118 // because we can't guarantee that dt is a multiple of the new time unit, which 119 // means we can't guarantee that the delta of delta when encoding the next 120 // data point is a multiple of the new time unit. 121 enc.PrevTimeDelta = 0 122 enc.timeUnitEncodedManually = false 123 return nil 124 } 125 err := enc.writeDeltaOfDeltaTimeUnitUnchanged( 126 stream, enc.PrevTimeDelta, timeDelta, timeUnit) 127 enc.PrevTimeDelta = timeDelta 128 return err 129 } 130 131 // WriteTimeUnit writes the new time unit into the stream. It exists as a standalone method 132 // so that other calls can encode time unit changes without relying on the marker scheme. 133 func (enc *TimestampEncoder) WriteTimeUnit(stream encoding.OStream, timeUnit xtime.Unit) { 134 stream.WriteByte(byte(timeUnit)) 135 enc.TimeUnit = timeUnit 136 enc.timeUnitEncodedManually = true 137 } 138 139 // maybeWriteTimeUnitChange encodes the time unit and returns true if the time unit has 140 // changed, and false otherwise. 141 func (enc *TimestampEncoder) maybeWriteTimeUnitChange(stream encoding.OStream, timeUnit xtime.Unit) bool { 142 if !enc.shouldWriteTimeUnit(timeUnit) { 143 return false 144 } 145 146 scheme := enc.markerEncodingScheme 147 encoding.WriteSpecialMarker(stream, scheme, scheme.TimeUnit()) 148 enc.WriteTimeUnit(stream, timeUnit) 149 return true 150 } 151 152 // shouldWriteTimeUnit determines whether we should write tu as a time unit. 153 // Returns true if tu is valid and differs from the existing time unit, false otherwise. 154 func (enc *TimestampEncoder) shouldWriteTimeUnit(timeUnit xtime.Unit) bool { 155 if !timeUnit.IsValid() || timeUnit == enc.TimeUnit { 156 return false 157 } 158 return true 159 } 160 161 // shouldWriteAnnotation determines whether we should write ant as an annotation. 162 // Returns true if ant is not empty and differs from the existing annotation, false otherwise. 163 // Also returns the checksum of the given annotation. 164 func (enc *TimestampEncoder) shouldWriteAnnotation(ant ts.Annotation) (bool, uint64) { 165 if len(ant) == 0 { 166 return false, emptyAnnotationChecksum 167 } 168 checksum := xxhash.Sum64(ant) 169 return checksum != enc.PrevAnnotationChecksum, checksum 170 } 171 172 func (enc *TimestampEncoder) writeAnnotation(stream encoding.OStream, ant ts.Annotation) { 173 shouldWrite, checksum := enc.shouldWriteAnnotation(ant) 174 if !shouldWrite { 175 return 176 } 177 178 scheme := enc.markerEncodingScheme 179 encoding.WriteSpecialMarker(stream, scheme, scheme.Annotation()) 180 181 var buf [binary.MaxVarintLen32]byte 182 // NB: we subtract 1 for possible varint encoding savings 183 annotationLength := binary.PutVarint(buf[:], int64(len(ant)-1)) 184 185 stream.WriteBytes(buf[:annotationLength]) 186 stream.WriteBytes(ant) 187 188 if enc.PrevAnnotationChecksum != emptyAnnotationChecksum { 189 // NB: current assumption is that each time series should have a single annotation write per block 190 // and that annotations should be rewritten rarely. If this assumption changes, it might not be worth 191 // keeping this metric around. 192 enc.metrics.IncAnnotationRewritten() 193 } 194 enc.PrevAnnotationChecksum = checksum 195 } 196 197 func (enc *TimestampEncoder) writeDeltaOfDeltaTimeUnitChanged( 198 stream encoding.OStream, prevDelta, curDelta time.Duration) { 199 // NB(xichen): if the time unit has changed, always normalize delta-of-delta 200 // to nanoseconds and encode it using 64 bits. 201 dodInNano := int64(curDelta - prevDelta) 202 stream.WriteBits(uint64(dodInNano), 64) 203 } 204 205 func (enc *TimestampEncoder) writeDeltaOfDeltaTimeUnitUnchanged( 206 stream encoding.OStream, prevDelta, curDelta time.Duration, timeUnit xtime.Unit) error { 207 u, err := timeUnit.Value() 208 if err != nil { 209 return err 210 } 211 212 deltaOfDelta := xtime.ToNormalizedDuration(curDelta-prevDelta, u) 213 if timeUnit == xtime.Millisecond || timeUnit == xtime.Second { 214 // Only milliseconds and seconds are encoded using 215 // up to 32 bits (see defaultTimeEncodingSchemes). 216 dod32 := int32(deltaOfDelta) 217 if int64(dod32) != deltaOfDelta { 218 return fmt.Errorf( 219 "deltaOfDelta value %d %s overflows 32 bits", deltaOfDelta, timeUnit) 220 } 221 } 222 223 tes, exists := enc.timeEncodingSchemes.SchemeForUnit(timeUnit) 224 if !exists { 225 return errNoTimeSchemaForUnit 226 } 227 228 if deltaOfDelta == 0 { 229 zeroBucket := tes.ZeroBucket() 230 stream.WriteBits(zeroBucket.Opcode(), zeroBucket.NumOpcodeBits()) 231 return nil 232 } 233 234 buckets := tes.Buckets() 235 for i := 0; i < len(buckets); i++ { 236 if deltaOfDelta >= buckets[i].Min() && deltaOfDelta <= buckets[i].Max() { 237 stream.WriteBits(buckets[i].Opcode(), buckets[i].NumOpcodeBits()) 238 stream.WriteBits(uint64(deltaOfDelta), buckets[i].NumValueBits()) 239 return nil 240 } 241 } 242 defaultBucket := tes.DefaultBucket() 243 stream.WriteBits(defaultBucket.Opcode(), defaultBucket.NumOpcodeBits()) 244 stream.WriteBits(uint64(deltaOfDelta), defaultBucket.NumValueBits()) 245 return nil 246 } 247 248 func initialTimeUnit(start xtime.UnixNano, tu xtime.Unit) xtime.Unit { 249 tv, err := tu.Value() 250 if err != nil { 251 return xtime.None 252 } 253 // If we want to use tu as the time unit for start, start must 254 // be a multiple of tu. 255 if start%xtime.UnixNano(tv) == 0 { 256 return tu 257 } 258 return xtime.None 259 }