github.com/m3db/m3@v1.5.0/src/dbnode/encoding/scheme.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package encoding 22 23 import ( 24 "github.com/m3db/m3/src/x/checked" 25 xtime "github.com/m3db/m3/src/x/time" 26 ) 27 28 const ( 29 // special markers 30 defaultEndOfStreamMarker Marker = iota 31 defaultAnnotationMarker 32 defaultTimeUnitMarker 33 34 // marker encoding information 35 defaultMarkerOpcode = 0x100 36 defaultNumMarkerOpcodeBits = 9 37 defaultNumMarkerValueBits = 2 38 ) 39 40 var ( 41 // default time encoding schemes 42 defaultZeroBucket = NewTimeBucket(0x0, 1, 0) 43 defaultNumValueBitsForBuckets = []int{7, 9, 12} 44 45 // TODO(xichen): set more reasonable defaults once we have more knowledge 46 // of the use cases for time units other than seconds. 47 defaultTimeEncodingSchemes = map[xtime.Unit]TimeEncodingScheme{ 48 xtime.Second: NewTimeEncodingScheme(defaultNumValueBitsForBuckets, 32), 49 xtime.Millisecond: NewTimeEncodingScheme(defaultNumValueBitsForBuckets, 32), 50 xtime.Microsecond: NewTimeEncodingScheme(defaultNumValueBitsForBuckets, 64), 51 xtime.Nanosecond: NewTimeEncodingScheme(defaultNumValueBitsForBuckets, 64), 52 } 53 54 // default marker encoding scheme 55 defaultMarkerEncodingScheme = NewMarkerEncodingScheme( 56 defaultMarkerOpcode, 57 defaultNumMarkerOpcodeBits, 58 defaultNumMarkerValueBits, 59 defaultEndOfStreamMarker, 60 defaultAnnotationMarker, 61 defaultTimeUnitMarker, 62 ) 63 ) 64 65 // TimeBucket represents a bucket for encoding time values. 66 type TimeBucket struct { 67 min int64 68 max int64 69 opcode uint64 70 numOpcodeBits int 71 numValueBits int 72 } 73 74 // NewTimeBucket creates a new time bucket. 75 func NewTimeBucket(opcode uint64, numOpcodeBits, numValueBits int) TimeBucket { 76 return TimeBucket{ 77 opcode: opcode, 78 numOpcodeBits: numOpcodeBits, 79 numValueBits: numValueBits, 80 min: -(1 << uint(numValueBits-1)), 81 max: (1 << uint(numValueBits-1)) - 1, 82 } 83 } 84 85 // Opcode is the opcode prefix used to encode all time values in this range. 86 func (tb *TimeBucket) Opcode() uint64 { return tb.opcode } 87 88 // NumOpcodeBits is the number of bits used to write the opcode. 89 func (tb *TimeBucket) NumOpcodeBits() int { return tb.numOpcodeBits } 90 91 // Min is the minimum time value accepted in this range. 92 func (tb *TimeBucket) Min() int64 { return tb.min } 93 94 // Max is the maximum time value accepted in this range. 95 func (tb *TimeBucket) Max() int64 { return tb.max } 96 97 // NumValueBits is the number of bits used to write the time value. 98 func (tb *TimeBucket) NumValueBits() int { return tb.numValueBits } 99 100 // TimeEncodingScheme captures information related to time encoding. 101 type TimeEncodingScheme struct { 102 zeroBucket TimeBucket 103 buckets []TimeBucket 104 defaultBucket TimeBucket 105 } 106 107 // NewTimeEncodingSchemes converts the unit-to-scheme mapping 108 // to the underlying TimeEncodingSchemes used for lookups. 109 func NewTimeEncodingSchemes(schemes map[xtime.Unit]TimeEncodingScheme) TimeEncodingSchemes { 110 encodingSchemes := make(TimeEncodingSchemes, xtime.UnitCount()) 111 for k, v := range schemes { 112 if !k.IsValid() { 113 continue 114 } 115 116 encodingSchemes[k] = v 117 } 118 119 return encodingSchemes 120 } 121 122 // NewTimeEncodingScheme creates a new time encoding scheme. 123 // NB(xichen): numValueBitsForBuckets should be ordered by value 124 // in ascending order (smallest value first). 125 func NewTimeEncodingScheme(numValueBitsForBuckets []int, numValueBitsForDefault int) TimeEncodingScheme { 126 numBuckets := len(numValueBitsForBuckets) 127 buckets := make([]TimeBucket, 0, numBuckets) 128 numOpcodeBits := 1 129 opcode := uint64(0) 130 i := 0 131 for i < numBuckets { 132 opcode = uint64(1<<uint(i+1)) | opcode 133 buckets = append(buckets, NewTimeBucket(opcode, numOpcodeBits+1, numValueBitsForBuckets[i])) 134 i++ 135 numOpcodeBits++ 136 } 137 defaultBucket := NewTimeBucket(opcode|0x1, numOpcodeBits, numValueBitsForDefault) 138 139 return TimeEncodingScheme{ 140 zeroBucket: defaultZeroBucket, 141 buckets: buckets, 142 defaultBucket: defaultBucket, 143 } 144 } 145 146 // ZeroBucket is time bucket for encoding zero time values. 147 func (tes *TimeEncodingScheme) ZeroBucket() *TimeBucket { return &tes.zeroBucket } 148 149 // Buckets are the ordered time buckets used to encode non-zero, non-default time values. 150 func (tes *TimeEncodingScheme) Buckets() []TimeBucket { return tes.buckets } 151 152 // DefaultBucket is the time bucket for catching all other time values not included in the regular buckets. 153 func (tes *TimeEncodingScheme) DefaultBucket() *TimeBucket { return &tes.defaultBucket } 154 155 // TimeEncodingSchemes defines the time encoding schemes for different time units. 156 type TimeEncodingSchemes []TimeEncodingScheme 157 158 // SchemeForUnit returns the corresponding TimeEncodingScheme for the provided unit. 159 // Returns false if the unit does not match a scheme or is invalid. 160 func (s TimeEncodingSchemes) SchemeForUnit(u xtime.Unit) (*TimeEncodingScheme, bool) { 161 if !u.IsValid() || int(u) >= len(s) { 162 return nil, false 163 } 164 return &s[u], true 165 } 166 167 // Marker represents the markers. 168 type Marker byte 169 170 // MarkerEncodingScheme captures the information related to marker encoding. 171 type MarkerEncodingScheme struct { 172 opcode uint64 173 numOpcodeBits int 174 numValueBits int 175 endOfStream Marker 176 annotation Marker 177 timeUnit Marker 178 tails [256][8]checked.Bytes 179 } 180 181 // NewMarkerEncodingScheme returns new marker encoding. 182 func NewMarkerEncodingScheme( 183 opcode uint64, 184 numOpcodeBits int, 185 numValueBits int, 186 endOfStream Marker, 187 annotation Marker, 188 timeUnit Marker, 189 ) *MarkerEncodingScheme { 190 scheme := &MarkerEncodingScheme{ 191 opcode: opcode, 192 numOpcodeBits: numOpcodeBits, 193 numValueBits: numValueBits, 194 endOfStream: endOfStream, 195 annotation: annotation, 196 timeUnit: timeUnit, 197 } 198 // NB(r): we precompute all possible tail streams dependent on last byte 199 // so we never have to pool or allocate tails for each stream when we 200 // want to take a snapshot of the current stream returned by the `Stream` method. 201 for i := range scheme.tails { 202 for j := range scheme.tails[i] { 203 pos := j + 1 204 tmp := NewOStream(checked.NewBytes(nil, nil), false, nil) 205 tmp.WriteBits(uint64(i)>>uint(8-pos), pos) 206 WriteSpecialMarker(tmp, scheme, endOfStream) 207 rawBytes, _ := tmp.RawBytes() 208 tail := checked.NewBytes(rawBytes, nil) 209 scheme.tails[i][j] = tail 210 } 211 } 212 return scheme 213 } 214 215 // WriteSpecialMarker writes the marker that marks the start of a special symbol, 216 // e.g., the eos marker, the annotation marker, or the time unit marker. 217 func WriteSpecialMarker(os OStream, scheme *MarkerEncodingScheme, marker Marker) { 218 os.WriteBits(scheme.Opcode(), scheme.NumOpcodeBits()) 219 os.WriteBits(uint64(marker), scheme.NumValueBits()) 220 } 221 222 // Opcode returns the marker opcode. 223 func (mes *MarkerEncodingScheme) Opcode() uint64 { return mes.opcode } 224 225 // NumOpcodeBits returns the number of bits used for the opcode. 226 func (mes *MarkerEncodingScheme) NumOpcodeBits() int { return mes.numOpcodeBits } 227 228 // NumValueBits returns the number of bits used for the marker value. 229 func (mes *MarkerEncodingScheme) NumValueBits() int { return mes.numValueBits } 230 231 // EndOfStream returns the end of stream marker. 232 func (mes *MarkerEncodingScheme) EndOfStream() Marker { return mes.endOfStream } 233 234 // Annotation returns the annotation marker. 235 func (mes *MarkerEncodingScheme) Annotation() Marker { return mes.annotation } 236 237 // TimeUnit returns the time unit marker. 238 func (mes *MarkerEncodingScheme) TimeUnit() Marker { return mes.timeUnit } 239 240 // Tail will return the tail portion of a stream including the relevant bits 241 // in the last byte along with the end of stream marker. 242 func (mes *MarkerEncodingScheme) Tail(b byte, pos int) checked.Bytes { return mes.tails[int(b)][pos-1] }