github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/metrics/compress/compressor.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package compress 18 19 import ( 20 "fmt" 21 "io" 22 "math" 23 ) 24 25 const ( 26 firstDeltaBits = 14 27 ) 28 29 // Compressor compresses time-series data based on Facebook's paper. 30 // Link to the paper: https://www.vldb.org/pvldb/vol8/p1816-teller.pdf 31 type Compressor struct { 32 bw *bitWriter 33 header int32 34 t int32 35 tDelta int32 36 leadingZeros uint8 37 trailingZeros uint8 38 value uint64 39 } 40 41 // NewCompressor initialize Compressor and returns a function to be invoked 42 // at the end of compressing. 43 func NewCompressor(w io.Writer, header uint32) (c *Compressor, finish func() error, err error) { 44 c = &Compressor{ 45 header: int32(header), 46 bw: newBitWriter(w), 47 leadingZeros: math.MaxUint8, 48 } 49 if err := c.bw.writeBits(uint64(header), 32); err != nil { 50 return nil, nil, fmt.Errorf("failed to write header: %w", err) 51 } 52 return c, c.finish, nil 53 } 54 55 // Compress compresses time-series data and write. 56 func (c *Compressor) Compress(t uint32, v float64) (uint64, error) { 57 // First time to compress. 58 if c.t == 0 { 59 var delta int32 60 if int32(t)-c.header < 0 { 61 delta = c.header - int32(t) 62 } else { 63 delta = int32(t) - c.header 64 } 65 c.t = int32(t) 66 c.tDelta = delta 67 c.value = math.Float64bits(v) 68 69 if err := c.bw.writeBits(uint64(delta), firstDeltaBits); err != nil { 70 return 0, fmt.Errorf("failed to write first timestamp: %w", err) 71 } 72 // The first value is stored with no compression. 73 if err := c.bw.writeBits(c.value, 64); err != nil { 74 return 0, fmt.Errorf("failed to write first value: %w", err) 75 } 76 writtenBytes := uint64(math.Round((firstDeltaBits + 64) / 8)) 77 return writtenBytes, nil 78 } 79 return c.compress(t, v) 80 } 81 82 func (c *Compressor) compress(t uint32, v float64) (uint64, error) { 83 84 var writtenBits uint64 85 tsSize, err := c.compressTimestamp(t) 86 writtenBits += tsSize 87 if err != nil { 88 return 0, fmt.Errorf("failed to compress timestamp: %w", err) 89 } 90 91 valSize, err := c.compressValue(v) 92 writtenBits += valSize 93 if err != nil { 94 return 0, fmt.Errorf("failed to compress value: %w", err) 95 } 96 97 writtenBytes := uint64(math.Round(float64(writtenBits) / 8)) 98 return writtenBytes, nil 99 } 100 101 // returns number of bits written or any errors 102 func (c *Compressor) compressTimestamp(t uint32) (uint64, error) { 103 delta := int32(t) - c.t 104 dod := int64(delta) - int64(c.tDelta) // delta of delta 105 c.t = int32(t) 106 c.tDelta = delta 107 108 var writtenBits uint64 109 110 // | DoD | Header value | Value bits | Total bits | 111 // |-------------|------------- |------------|------------| 112 // | 0 | 0 | 0 | 1 | 113 // | -63, 64 | 10 | 7 | 9 | 114 // | -255, 256 | 110 | 9 | 12 | 115 // | -2047, 2048 | 1110 | 12 | 16 | 116 // | > 2048 | 1111 | 32 | 36 | 117 switch { 118 case dod == 0: 119 if err := c.bw.writeBit(zero); err != nil { 120 return 0, fmt.Errorf("failed to write timestamp zero: %w", err) 121 } 122 writtenBits++ 123 case -63 <= dod && dod <= 64: 124 // 0x02 == '10' 125 if err := c.bw.writeBits(0x02, 2); err != nil { 126 return 0, fmt.Errorf("failed to write 2 bits header: %w", err) 127 } 128 if err := writeInt64Bits(c.bw, dod, 7); err != nil { 129 return 0, fmt.Errorf("failed to write 7 bits dod: %w", err) 130 } 131 writtenBits += 9 132 case -255 <= dod && dod <= 256: 133 // 0x06 == '110' 134 if err := c.bw.writeBits(0x06, 3); err != nil { 135 return 0, fmt.Errorf("failed to write 3 bits header: %w", err) 136 } 137 if err := writeInt64Bits(c.bw, dod, 9); err != nil { 138 return 0, fmt.Errorf("failed to write 9 bits dod: %w", err) 139 } 140 writtenBits += 12 141 case -2047 <= dod && dod <= 2048: 142 // 0x0E == '1110' 143 if err := c.bw.writeBits(0x0E, 4); err != nil { 144 return 0, fmt.Errorf("failed to write 4 bits header: %w", err) 145 } 146 if err := writeInt64Bits(c.bw, dod, 12); err != nil { 147 return 0, fmt.Errorf("failed to write 12 bits dod: %w", err) 148 } 149 writtenBits += 16 150 default: 151 // 0x0F == '1111' 152 if err := c.bw.writeBits(0x0F, 4); err != nil { 153 return 0, fmt.Errorf("failed to write 4 bits header: %w", err) 154 } 155 if err := writeInt64Bits(c.bw, dod, 32); err != nil { 156 return 0, fmt.Errorf("failed to write 32 bits dod: %w", err) 157 } 158 writtenBits += 36 159 } 160 161 return writtenBits, nil 162 } 163 164 func writeInt64Bits(bw *bitWriter, i int64, nbits uint) error { 165 var u uint64 166 if i >= 0 || nbits >= 64 { 167 u = uint64(i) 168 } else { 169 u = uint64(1<<nbits + i) 170 } 171 return bw.writeBits(u, int(nbits)) 172 } 173 174 // returns number of bits written or any errors 175 func (c *Compressor) compressValue(v float64) (uint64, error) { 176 value := math.Float64bits(v) 177 xor := c.value ^ value 178 c.value = value 179 180 var writtenBits uint64 181 182 // Value is the same as previous. 183 if xor == 0 { 184 return 1, c.bw.writeBit(zero) 185 } 186 187 leadingZeros := leardingZeros(xor) 188 trailingZeros := trailingZeros(xor) 189 190 if err := c.bw.writeBit(one); err != nil { 191 return 0, fmt.Errorf("failed to write one bit: %w", err) 192 } 193 writtenBits++ 194 195 // If the block of meaningful bits falls within the block of previous meaningful bits, 196 // i.c., there are at least as many leading zeros and as many trailing zeros as with the previous value 197 // use that information for the block position and just store the meaningful XORed valuc. 198 if c.leadingZeros <= leadingZeros && c.trailingZeros <= trailingZeros { 199 if err := c.bw.writeBit(zero); err != nil { 200 return 0, fmt.Errorf("failed to write zero bit: %w", err) 201 } 202 significantBits := int(64 - c.leadingZeros - c.trailingZeros) 203 if err := c.bw.writeBits(xor>>c.trailingZeros, significantBits); err != nil { 204 return 0, fmt.Errorf("failed to write xor value: %w", err) 205 } 206 writtenBits += (uint64(significantBits + 1)) 207 return writtenBits, nil 208 } 209 210 c.leadingZeros = leadingZeros 211 c.trailingZeros = trailingZeros 212 213 if err := c.bw.writeBit(one); err != nil { 214 return 0, fmt.Errorf("failed to write one bit: %w", err) 215 } 216 if err := c.bw.writeBits(uint64(leadingZeros), 5); err != nil { 217 return 0, fmt.Errorf("failed to write leading zeros: %w", err) 218 } 219 writtenBits += 6 220 221 // Note that if leading == trailing == 0, then sigbits == 64. 222 // But that value doesn't actually fit into the 6 bits we havc. 223 // Luckily, we never need to encode 0 significant bits, 224 // since that would put us in the other case (vDelta == 0). 225 // So instead we write out a 0 and adjust it back to 64 on unpacking. 226 significantBits := 64 - leadingZeros - trailingZeros 227 if err := c.bw.writeBits(uint64(significantBits), 6); err != nil { 228 return 0, fmt.Errorf("failed to write significant bits: %w", err) 229 } 230 if err := c.bw.writeBits(xor>>c.trailingZeros, int(significantBits)); err != nil { 231 return 0, fmt.Errorf("failed to write xor value") 232 } 233 writtenBits += (6 + uint64(significantBits)) 234 return writtenBits, nil 235 } 236 237 func leardingZeros(v uint64) uint8 { 238 var mask uint64 = 0x8000000000000000 239 var ret uint8 = 0 240 for ; ret < 64 && v&mask == 0; ret++ { 241 mask >>= 1 242 } 243 return ret 244 } 245 246 func trailingZeros(v uint64) uint8 { 247 var mask uint64 = 0x0000000000000001 248 var ret uint8 = 0 249 for ; ret < 64 && v&mask == 0; ret++ { 250 mask <<= 1 251 } 252 return ret 253 } 254 255 // finish compresses the finish marker and flush bits with zero bits padding for byte-align. 256 func (c *Compressor) finish() error { 257 if c.t == 0 { 258 // Add finish marker with delta = 0x3FFF (firstDeltaBits = 14 bits), and first value = 0 259 err := c.bw.writeBits(1<<firstDeltaBits-1, firstDeltaBits) 260 if err != nil { 261 return err 262 } 263 err = c.bw.writeBits(0, 64) 264 if err != nil { 265 return err 266 } 267 return c.bw.flush(zero) 268 } 269 270 // Add finish marker with deltaOfDelta = 0xFFFFFFFF, and value xor = 0 271 err := c.bw.writeBits(0x0F, 4) 272 if err != nil { 273 return err 274 } 275 err = c.bw.writeBits(0xFFFFFFFF, 32) 276 if err != nil { 277 return err 278 } 279 err = c.bw.writeBit(zero) 280 if err != nil { 281 return err 282 } 283 return c.bw.flush(zero) 284 }