github.com/arieschain/arieschain@v0.0.0-20191023063405-37c074544356/common/bitutil/compress.go (about)

     1  package bitutil
     2  
     3  import "errors"
     4  
     5  var (
     6  	// errMissingData is returned from decompression if the byte referenced by
     7  	// the bitset header overflows the input data.
     8  	errMissingData = errors.New("missing bytes on input")
     9  
    10  	// errUnreferencedData is returned from decompression if not all bytes were used
    11  	// up from the input data after decompressing it.
    12  	errUnreferencedData = errors.New("extra bytes on input")
    13  
    14  	// errExceededTarget is returned from decompression if the bitset header has
    15  	// more bits defined than the number of target buffer space available.
    16  	errExceededTarget = errors.New("target data size exceeded")
    17  
    18  	// errZeroContent is returned from decompression if a data byte referenced in
    19  	// the bitset header is actually a zero byte.
    20  	errZeroContent = errors.New("zero byte in input content")
    21  )
    22  
    23  // The compression algorithm implemented by CompressBytes and DecompressBytes is
    24  // optimized for sparse input data which contains a lot of zero bytes. Decompression
    25  // requires knowledge of the decompressed data length.
    26  //
    27  // Compression works as follows:
    28  //
    29  //   if data only contains zeroes,
    30  //       CompressBytes(data) == nil
    31  //   otherwise if len(data) <= 1,
    32  //       CompressBytes(data) == data
    33  //   otherwise:
    34  //       CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...)
    35  //       where
    36  //         nonZeroBitset(data) is a bit vector with len(data) bits (MSB first):
    37  //             nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0  if data[i] != 0
    38  //             len(nonZeroBitset(data)) == (len(data)+7)/8
    39  //         nonZeroBytes(data) contains the non-zero bytes of data in the same order
    40  
    41  // CompressBytes compresses the input byte slice according to the sparse bitset
    42  // representation algorithm. If the result is bigger than the original input, no
    43  // compression is done.
    44  func CompressBytes(data []byte) []byte {
    45  	if out := bitsetEncodeBytes(data); len(out) < len(data) {
    46  		return out
    47  	}
    48  	cpy := make([]byte, len(data))
    49  	copy(cpy, data)
    50  	return cpy
    51  }
    52  
    53  // bitsetEncodeBytes compresses the input byte slice according to the sparse
    54  // bitset representation algorithm.
    55  func bitsetEncodeBytes(data []byte) []byte {
    56  	// Empty slices get compressed to nil
    57  	if len(data) == 0 {
    58  		return nil
    59  	}
    60  	// One byte slices compress to nil or retain the single byte
    61  	if len(data) == 1 {
    62  		if data[0] == 0 {
    63  			return nil
    64  		}
    65  		return data
    66  	}
    67  	// Calculate the bitset of set bytes, and gather the non-zero bytes
    68  	nonZeroBitset := make([]byte, (len(data)+7)/8)
    69  	nonZeroBytes := make([]byte, 0, len(data))
    70  
    71  	for i, b := range data {
    72  		if b != 0 {
    73  			nonZeroBytes = append(nonZeroBytes, b)
    74  			nonZeroBitset[i/8] |= 1 << byte(7-i%8)
    75  		}
    76  	}
    77  	if len(nonZeroBytes) == 0 {
    78  		return nil
    79  	}
    80  	return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...)
    81  }
    82  
    83  // DecompressBytes decompresses data with a known target size. If the input data
    84  // matches the size of the target, it means no compression was done in the first
    85  // place.
    86  func DecompressBytes(data []byte, target int) ([]byte, error) {
    87  	if len(data) > target {
    88  		return nil, errExceededTarget
    89  	}
    90  	if len(data) == target {
    91  		cpy := make([]byte, len(data))
    92  		copy(cpy, data)
    93  		return cpy, nil
    94  	}
    95  	return bitsetDecodeBytes(data, target)
    96  }
    97  
    98  // bitsetDecodeBytes decompresses data with a known target size.
    99  func bitsetDecodeBytes(data []byte, target int) ([]byte, error) {
   100  	out, size, err := bitsetDecodePartialBytes(data, target)
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  	if size != len(data) {
   105  		return nil, errUnreferencedData
   106  	}
   107  	return out, nil
   108  }
   109  
   110  // bitsetDecodePartialBytes decompresses data with a known target size, but does
   111  // not enforce consuming all the input bytes. In addition to the decompressed
   112  // output, the function returns the length of compressed input data corresponding
   113  // to the output as the input slice may be longer.
   114  func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) {
   115  	// Sanity check 0 targets to avoid infinite recursion
   116  	if target == 0 {
   117  		return nil, 0, nil
   118  	}
   119  	// Handle the zero and single byte corner cases
   120  	decomp := make([]byte, target)
   121  	if len(data) == 0 {
   122  		return decomp, 0, nil
   123  	}
   124  	if target == 1 {
   125  		decomp[0] = data[0] // copy to avoid referencing the input slice
   126  		if data[0] != 0 {
   127  			return decomp, 1, nil
   128  		}
   129  		return decomp, 0, nil
   130  	}
   131  	// Decompress the bitset of set bytes and distribute the non zero bytes
   132  	nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8)
   133  	if err != nil {
   134  		return nil, ptr, err
   135  	}
   136  	for i := 0; i < 8*len(nonZeroBitset); i++ {
   137  		if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 {
   138  			// Make sure we have enough data to push into the correct slot
   139  			if ptr >= len(data) {
   140  				return nil, 0, errMissingData
   141  			}
   142  			if i >= len(decomp) {
   143  				return nil, 0, errExceededTarget
   144  			}
   145  			// Make sure the data is valid and push into the slot
   146  			if data[ptr] == 0 {
   147  				return nil, 0, errZeroContent
   148  			}
   149  			decomp[i] = data[ptr]
   150  			ptr++
   151  		}
   152  	}
   153  	return decomp, ptr, nil
   154  }