github.com/sberex/go-sberex@v1.8.2-0.20181113200658-ed96ac38f7d7/common/bitutil/compress.go (about)

     1  // This file is part of the go-sberex library. The go-sberex library is 
     2  // free software: you can redistribute it and/or modify it under the terms 
     3  // of the GNU Lesser General Public License as published by the Free 
     4  // Software Foundation, either version 3 of the License, or (at your option)
     5  // any later version.
     6  //
     7  // The go-sberex library is distributed in the hope that it will be useful, 
     8  // but WITHOUT ANY WARRANTY; without even the implied warranty of
     9  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 
    10  // General Public License <http://www.gnu.org/licenses/> for more details.
    11  
    12  package bitutil
    13  
    14  import "errors"
    15  
    16  var (
    17  	// errMissingData is returned from decompression if the byte referenced by
    18  	// the bitset header overflows the input data.
    19  	errMissingData = errors.New("missing bytes on input")
    20  
    21  	// errUnreferencedData is returned from decompression if not all bytes were used
    22  	// up from the input data after decompressing it.
    23  	errUnreferencedData = errors.New("extra bytes on input")
    24  
    25  	// errExceededTarget is returned from decompression if the bitset header has
    26  	// more bits defined than the number of target buffer space available.
    27  	errExceededTarget = errors.New("target data size exceeded")
    28  
    29  	// errZeroContent is returned from decompression if a data byte referenced in
    30  	// the bitset header is actually a zero byte.
    31  	errZeroContent = errors.New("zero byte in input content")
    32  )
    33  
    34  // The compression algorithm implemented by CompressBytes and DecompressBytes is
    35  // optimized for sparse input data which contains a lot of zero bytes. Decompression
    36  // requires knowledge of the decompressed data length.
    37  //
    38  // Compression works as follows:
    39  //
    40  //   if data only contains zeroes,
    41  //       CompressBytes(data) == nil
    42  //   otherwise if len(data) <= 1,
    43  //       CompressBytes(data) == data
    44  //   otherwise:
    45  //       CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...)
    46  //       where
    47  //         nonZeroBitset(data) is a bit vector with len(data) bits (MSB first):
    48  //             nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0  if data[i] != 0
    49  //             len(nonZeroBitset(data)) == (len(data)+7)/8
    50  //         nonZeroBytes(data) contains the non-zero bytes of data in the same order
    51  
    52  // CompressBytes compresses the input byte slice according to the sparse bitset
    53  // representation algorithm. If the result is bigger than the original input, no
    54  // compression is done.
    55  func CompressBytes(data []byte) []byte {
    56  	if out := bitsetEncodeBytes(data); len(out) < len(data) {
    57  		return out
    58  	}
    59  	cpy := make([]byte, len(data))
    60  	copy(cpy, data)
    61  	return cpy
    62  }
    63  
    64  // bitsetEncodeBytes compresses the input byte slice according to the sparse
    65  // bitset representation algorithm.
    66  func bitsetEncodeBytes(data []byte) []byte {
    67  	// Empty slices get compressed to nil
    68  	if len(data) == 0 {
    69  		return nil
    70  	}
    71  	// One byte slices compress to nil or retain the single byte
    72  	if len(data) == 1 {
    73  		if data[0] == 0 {
    74  			return nil
    75  		}
    76  		return data
    77  	}
    78  	// Calculate the bitset of set bytes, and gather the non-zero bytes
    79  	nonZeroBitset := make([]byte, (len(data)+7)/8)
    80  	nonZeroBytes := make([]byte, 0, len(data))
    81  
    82  	for i, b := range data {
    83  		if b != 0 {
    84  			nonZeroBytes = append(nonZeroBytes, b)
    85  			nonZeroBitset[i/8] |= 1 << byte(7-i%8)
    86  		}
    87  	}
    88  	if len(nonZeroBytes) == 0 {
    89  		return nil
    90  	}
    91  	return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...)
    92  }
    93  
    94  // DecompressBytes decompresses data with a known target size. If the input data
    95  // matches the size of the target, it means no compression was done in the first
    96  // place.
    97  func DecompressBytes(data []byte, target int) ([]byte, error) {
    98  	if len(data) > target {
    99  		return nil, errExceededTarget
   100  	}
   101  	if len(data) == target {
   102  		cpy := make([]byte, len(data))
   103  		copy(cpy, data)
   104  		return cpy, nil
   105  	}
   106  	return bitsetDecodeBytes(data, target)
   107  }
   108  
   109  // bitsetDecodeBytes decompresses data with a known target size.
   110  func bitsetDecodeBytes(data []byte, target int) ([]byte, error) {
   111  	out, size, err := bitsetDecodePartialBytes(data, target)
   112  	if err != nil {
   113  		return nil, err
   114  	}
   115  	if size != len(data) {
   116  		return nil, errUnreferencedData
   117  	}
   118  	return out, nil
   119  }
   120  
   121  // bitsetDecodePartialBytes decompresses data with a known target size, but does
   122  // not enforce consuming all the input bytes. In addition to the decompressed
   123  // output, the function returns the length of compressed input data corresponding
   124  // to the output as the input slice may be longer.
   125  func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) {
   126  	// Sanity check 0 targets to avoid infinite recursion
   127  	if target == 0 {
   128  		return nil, 0, nil
   129  	}
   130  	// Handle the zero and single byte corner cases
   131  	decomp := make([]byte, target)
   132  	if len(data) == 0 {
   133  		return decomp, 0, nil
   134  	}
   135  	if target == 1 {
   136  		decomp[0] = data[0] // copy to avoid referencing the input slice
   137  		if data[0] != 0 {
   138  			return decomp, 1, nil
   139  		}
   140  		return decomp, 0, nil
   141  	}
   142  	// Decompress the bitset of set bytes and distribute the non zero bytes
   143  	nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8)
   144  	if err != nil {
   145  		return nil, ptr, err
   146  	}
   147  	for i := 0; i < 8*len(nonZeroBitset); i++ {
   148  		if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 {
   149  			// Make sure we have enough data to push into the correct slot
   150  			if ptr >= len(data) {
   151  				return nil, 0, errMissingData
   152  			}
   153  			if i >= len(decomp) {
   154  				return nil, 0, errExceededTarget
   155  			}
   156  			// Make sure the data is valid and push into the slot
   157  			if data[ptr] == 0 {
   158  				return nil, 0, errZeroContent
   159  			}
   160  			decomp[i] = data[ptr]
   161  			ptr++
   162  		}
   163  	}
   164  	return decomp, ptr, nil
   165  }