github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/pkg/segment/writer/metrics/compress/compressor.go (about)

     1  /*
     2  Copyright 2023.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package compress
    18  
    19  import (
    20  	"fmt"
    21  	"io"
    22  	"math"
    23  )
    24  
    25  const (
    26  	firstDeltaBits = 14
    27  )
    28  
    29  // Compressor compresses time-series data based on Facebook's paper.
    30  // Link to the paper: https://www.vldb.org/pvldb/vol8/p1816-teller.pdf
    31  type Compressor struct {
    32  	bw            *bitWriter
    33  	header        int32
    34  	t             int32
    35  	tDelta        int32
    36  	leadingZeros  uint8
    37  	trailingZeros uint8
    38  	value         uint64
    39  }
    40  
    41  // NewCompressor initialize Compressor and returns a function to be invoked
    42  // at the end of compressing.
    43  func NewCompressor(w io.Writer, header uint32) (c *Compressor, finish func() error, err error) {
    44  	c = &Compressor{
    45  		header:       int32(header),
    46  		bw:           newBitWriter(w),
    47  		leadingZeros: math.MaxUint8,
    48  	}
    49  	if err := c.bw.writeBits(uint64(header), 32); err != nil {
    50  		return nil, nil, fmt.Errorf("failed to write header: %w", err)
    51  	}
    52  	return c, c.finish, nil
    53  }
    54  
    55  // Compress compresses time-series data and write.
    56  func (c *Compressor) Compress(t uint32, v float64) (uint64, error) {
    57  	// First time to compress.
    58  	if c.t == 0 {
    59  		var delta int32
    60  		if int32(t)-c.header < 0 {
    61  			delta = c.header - int32(t)
    62  		} else {
    63  			delta = int32(t) - c.header
    64  		}
    65  		c.t = int32(t)
    66  		c.tDelta = delta
    67  		c.value = math.Float64bits(v)
    68  
    69  		if err := c.bw.writeBits(uint64(delta), firstDeltaBits); err != nil {
    70  			return 0, fmt.Errorf("failed to write first timestamp: %w", err)
    71  		}
    72  		// The first value is stored with no compression.
    73  		if err := c.bw.writeBits(c.value, 64); err != nil {
    74  			return 0, fmt.Errorf("failed to write first value: %w", err)
    75  		}
    76  		writtenBytes := uint64(math.Round((firstDeltaBits + 64) / 8))
    77  		return writtenBytes, nil
    78  	}
    79  	return c.compress(t, v)
    80  }
    81  
    82  func (c *Compressor) compress(t uint32, v float64) (uint64, error) {
    83  
    84  	var writtenBits uint64
    85  	tsSize, err := c.compressTimestamp(t)
    86  	writtenBits += tsSize
    87  	if err != nil {
    88  		return 0, fmt.Errorf("failed to compress timestamp: %w", err)
    89  	}
    90  
    91  	valSize, err := c.compressValue(v)
    92  	writtenBits += valSize
    93  	if err != nil {
    94  		return 0, fmt.Errorf("failed to compress value: %w", err)
    95  	}
    96  
    97  	writtenBytes := uint64(math.Round(float64(writtenBits) / 8))
    98  	return writtenBytes, nil
    99  }
   100  
   101  // returns number of bits written or any errors
   102  func (c *Compressor) compressTimestamp(t uint32) (uint64, error) {
   103  	delta := int32(t) - c.t
   104  	dod := int64(delta) - int64(c.tDelta) // delta of delta
   105  	c.t = int32(t)
   106  	c.tDelta = delta
   107  
   108  	var writtenBits uint64
   109  
   110  	// | DoD         | Header value | Value bits | Total bits |
   111  	// |-------------|------------- |------------|------------|
   112  	// | 0           | 0            | 0          | 1          |
   113  	// | -63, 64     | 10           | 7          | 9          |
   114  	// | -255, 256   | 110          | 9          | 12         |
   115  	// | -2047, 2048 | 1110         | 12         | 16         |
   116  	// | > 2048      | 1111         | 32         | 36         |
   117  	switch {
   118  	case dod == 0:
   119  		if err := c.bw.writeBit(zero); err != nil {
   120  			return 0, fmt.Errorf("failed to write timestamp zero: %w", err)
   121  		}
   122  		writtenBits++
   123  	case -63 <= dod && dod <= 64:
   124  		// 0x02 == '10'
   125  		if err := c.bw.writeBits(0x02, 2); err != nil {
   126  			return 0, fmt.Errorf("failed to write 2 bits header: %w", err)
   127  		}
   128  		if err := writeInt64Bits(c.bw, dod, 7); err != nil {
   129  			return 0, fmt.Errorf("failed to write 7 bits dod: %w", err)
   130  		}
   131  		writtenBits += 9
   132  	case -255 <= dod && dod <= 256:
   133  		// 0x06 == '110'
   134  		if err := c.bw.writeBits(0x06, 3); err != nil {
   135  			return 0, fmt.Errorf("failed to write 3 bits header: %w", err)
   136  		}
   137  		if err := writeInt64Bits(c.bw, dod, 9); err != nil {
   138  			return 0, fmt.Errorf("failed to write 9 bits dod: %w", err)
   139  		}
   140  		writtenBits += 12
   141  	case -2047 <= dod && dod <= 2048:
   142  		// 0x0E == '1110'
   143  		if err := c.bw.writeBits(0x0E, 4); err != nil {
   144  			return 0, fmt.Errorf("failed to write 4 bits header: %w", err)
   145  		}
   146  		if err := writeInt64Bits(c.bw, dod, 12); err != nil {
   147  			return 0, fmt.Errorf("failed to write 12 bits dod: %w", err)
   148  		}
   149  		writtenBits += 16
   150  	default:
   151  		// 0x0F == '1111'
   152  		if err := c.bw.writeBits(0x0F, 4); err != nil {
   153  			return 0, fmt.Errorf("failed to write 4 bits header: %w", err)
   154  		}
   155  		if err := writeInt64Bits(c.bw, dod, 32); err != nil {
   156  			return 0, fmt.Errorf("failed to write 32 bits dod: %w", err)
   157  		}
   158  		writtenBits += 36
   159  	}
   160  
   161  	return writtenBits, nil
   162  }
   163  
   164  func writeInt64Bits(bw *bitWriter, i int64, nbits uint) error {
   165  	var u uint64
   166  	if i >= 0 || nbits >= 64 {
   167  		u = uint64(i)
   168  	} else {
   169  		u = uint64(1<<nbits + i)
   170  	}
   171  	return bw.writeBits(u, int(nbits))
   172  }
   173  
   174  // returns number of bits written or any errors
   175  func (c *Compressor) compressValue(v float64) (uint64, error) {
   176  	value := math.Float64bits(v)
   177  	xor := c.value ^ value
   178  	c.value = value
   179  
   180  	var writtenBits uint64
   181  
   182  	// Value is the same as previous.
   183  	if xor == 0 {
   184  		return 1, c.bw.writeBit(zero)
   185  	}
   186  
   187  	leadingZeros := leardingZeros(xor)
   188  	trailingZeros := trailingZeros(xor)
   189  
   190  	if err := c.bw.writeBit(one); err != nil {
   191  		return 0, fmt.Errorf("failed to write one bit: %w", err)
   192  	}
   193  	writtenBits++
   194  
   195  	// If the block of meaningful bits falls within the block of previous meaningful bits,
   196  	// i.c., there are at least as many leading zeros and as many trailing zeros as with the previous value
   197  	// use that information for the block position and just store the meaningful XORed valuc.
   198  	if c.leadingZeros <= leadingZeros && c.trailingZeros <= trailingZeros {
   199  		if err := c.bw.writeBit(zero); err != nil {
   200  			return 0, fmt.Errorf("failed to write zero bit: %w", err)
   201  		}
   202  		significantBits := int(64 - c.leadingZeros - c.trailingZeros)
   203  		if err := c.bw.writeBits(xor>>c.trailingZeros, significantBits); err != nil {
   204  			return 0, fmt.Errorf("failed to write xor value: %w", err)
   205  		}
   206  		writtenBits += (uint64(significantBits + 1))
   207  		return writtenBits, nil
   208  	}
   209  
   210  	c.leadingZeros = leadingZeros
   211  	c.trailingZeros = trailingZeros
   212  
   213  	if err := c.bw.writeBit(one); err != nil {
   214  		return 0, fmt.Errorf("failed to write one bit: %w", err)
   215  	}
   216  	if err := c.bw.writeBits(uint64(leadingZeros), 5); err != nil {
   217  		return 0, fmt.Errorf("failed to write leading zeros: %w", err)
   218  	}
   219  	writtenBits += 6
   220  
   221  	// Note that if leading == trailing == 0, then sigbits == 64.
   222  	// But that value doesn't actually fit into the 6 bits we havc.
   223  	// Luckily, we never need to encode 0 significant bits,
   224  	// since that would put us in the other case (vDelta == 0).
   225  	// So instead we write out a 0 and adjust it back to 64 on unpacking.
   226  	significantBits := 64 - leadingZeros - trailingZeros
   227  	if err := c.bw.writeBits(uint64(significantBits), 6); err != nil {
   228  		return 0, fmt.Errorf("failed to write significant bits: %w", err)
   229  	}
   230  	if err := c.bw.writeBits(xor>>c.trailingZeros, int(significantBits)); err != nil {
   231  		return 0, fmt.Errorf("failed to write xor value")
   232  	}
   233  	writtenBits += (6 + uint64(significantBits))
   234  	return writtenBits, nil
   235  }
   236  
   237  func leardingZeros(v uint64) uint8 {
   238  	var mask uint64 = 0x8000000000000000
   239  	var ret uint8 = 0
   240  	for ; ret < 64 && v&mask == 0; ret++ {
   241  		mask >>= 1
   242  	}
   243  	return ret
   244  }
   245  
   246  func trailingZeros(v uint64) uint8 {
   247  	var mask uint64 = 0x0000000000000001
   248  	var ret uint8 = 0
   249  	for ; ret < 64 && v&mask == 0; ret++ {
   250  		mask <<= 1
   251  	}
   252  	return ret
   253  }
   254  
   255  // finish compresses the finish marker and flush bits with zero bits padding for byte-align.
   256  func (c *Compressor) finish() error {
   257  	if c.t == 0 {
   258  		// Add finish marker with delta = 0x3FFF (firstDeltaBits = 14 bits), and first value = 0
   259  		err := c.bw.writeBits(1<<firstDeltaBits-1, firstDeltaBits)
   260  		if err != nil {
   261  			return err
   262  		}
   263  		err = c.bw.writeBits(0, 64)
   264  		if err != nil {
   265  			return err
   266  		}
   267  		return c.bw.flush(zero)
   268  	}
   269  
   270  	// Add finish marker with deltaOfDelta = 0xFFFFFFFF, and value xor = 0
   271  	err := c.bw.writeBits(0x0F, 4)
   272  	if err != nil {
   273  		return err
   274  	}
   275  	err = c.bw.writeBits(0xFFFFFFFF, 32)
   276  	if err != nil {
   277  		return err
   278  	}
   279  	err = c.bw.writeBit(zero)
   280  	if err != nil {
   281  		return err
   282  	}
   283  	return c.bw.flush(zero)
   284  }