gitlab.com/aquachain/aquachain@v1.17.16-rc3.0.20221018032414-e3ddf1e1c055/common/bitutil/compress.go (about)

     1  // Copyright 2018 The aquachain Authors
     2  // This file is part of the aquachain library.
     3  //
     4  // The aquachain library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The aquachain library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the aquachain library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package bitutil
    18  
    19  import "errors"
    20  
    21  var (
    22  	// errMissingData is returned from decompression if the byte referenced by
    23  	// the bitset header overflows the input data.
    24  	errMissingData = errors.New("missing bytes on input")
    25  
    26  	// errUnreferencedData is returned from decompression if not all bytes were used
    27  	// up from the input data after decompressing it.
    28  	errUnreferencedData = errors.New("extra bytes on input")
    29  
    30  	// errExceededTarget is returned from decompression if the bitset header has
    31  	// more bits defined than the number of target buffer space available.
    32  	errExceededTarget = errors.New("target data size exceeded")
    33  
    34  	// errZeroContent is returned from decompression if a data byte referenced in
    35  	// the bitset header is actually a zero byte.
    36  	errZeroContent = errors.New("zero byte in input content")
    37  )
    38  
    39  // The compression algorithm implemented by CompressBytes and DecompressBytes is
    40  // optimized for sparse input data which contains a lot of zero bytes. Decompression
    41  // requires knowledge of the decompressed data length.
    42  //
    43  // Compression works as follows:
    44  //
    45  //   if data only contains zeroes,
    46  //       CompressBytes(data) == nil
    47  //   otherwise if len(data) <= 1,
    48  //       CompressBytes(data) == data
    49  //   otherwise:
    50  //       CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...)
    51  //       where
    52  //         nonZeroBitset(data) is a bit vector with len(data) bits (MSB first):
    53  //             nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0  if data[i] != 0
    54  //             len(nonZeroBitset(data)) == (len(data)+7)/8
    55  //         nonZeroBytes(data) contains the non-zero bytes of data in the same order
    56  
    57  // CompressBytes compresses the input byte slice according to the sparse bitset
    58  // representation algorithm. If the result is bigger than the original input, no
    59  // compression is done.
    60  func CompressBytes(data []byte) []byte {
    61  	if out := bitsetEncodeBytes(data); len(out) < len(data) {
    62  		return out
    63  	}
    64  	cpy := make([]byte, len(data))
    65  	copy(cpy, data)
    66  	return cpy
    67  }
    68  
    69  // bitsetEncodeBytes compresses the input byte slice according to the sparse
    70  // bitset representation algorithm.
    71  func bitsetEncodeBytes(data []byte) []byte {
    72  	// Empty slices get compressed to nil
    73  	if len(data) == 0 {
    74  		return nil
    75  	}
    76  	// One byte slices compress to nil or retain the single byte
    77  	if len(data) == 1 {
    78  		if data[0] == 0 {
    79  			return nil
    80  		}
    81  		return data
    82  	}
    83  	// Calculate the bitset of set bytes, and gather the non-zero bytes
    84  	nonZeroBitset := make([]byte, (len(data)+7)/8)
    85  	nonZeroBytes := make([]byte, 0, len(data))
    86  
    87  	for i, b := range data {
    88  		if b != 0 {
    89  			nonZeroBytes = append(nonZeroBytes, b)
    90  			nonZeroBitset[i/8] |= 1 << byte(7-i%8)
    91  		}
    92  	}
    93  	if len(nonZeroBytes) == 0 {
    94  		return nil
    95  	}
    96  	return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...)
    97  }
    98  
    99  // DecompressBytes decompresses data with a known target size. If the input data
   100  // matches the size of the target, it means no compression was done in the first
   101  // place.
   102  func DecompressBytes(data []byte, target int) ([]byte, error) {
   103  	if len(data) > target {
   104  		return nil, errExceededTarget
   105  	}
   106  	if len(data) == target {
   107  		cpy := make([]byte, len(data))
   108  		copy(cpy, data)
   109  		return cpy, nil
   110  	}
   111  	return bitsetDecodeBytes(data, target)
   112  }
   113  
   114  // bitsetDecodeBytes decompresses data with a known target size.
   115  func bitsetDecodeBytes(data []byte, target int) ([]byte, error) {
   116  	out, size, err := bitsetDecodePartialBytes(data, target)
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  	if size != len(data) {
   121  		return nil, errUnreferencedData
   122  	}
   123  	return out, nil
   124  }
   125  
   126  // bitsetDecodePartialBytes decompresses data with a known target size, but does
   127  // not enforce consuming all the input bytes. In addition to the decompressed
   128  // output, the function returns the length of compressed input data corresponding
   129  // to the output as the input slice may be longer.
   130  func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) {
   131  	// Sanity check 0 targets to avoid infinite recursion
   132  	if target == 0 {
   133  		return nil, 0, nil
   134  	}
   135  	// Handle the zero and single byte corner cases
   136  	decomp := make([]byte, target)
   137  	if len(data) == 0 {
   138  		return decomp, 0, nil
   139  	}
   140  	if target == 1 {
   141  		decomp[0] = data[0] // copy to avoid referencing the input slice
   142  		if data[0] != 0 {
   143  			return decomp, 1, nil
   144  		}
   145  		return decomp, 0, nil
   146  	}
   147  	// Decompress the bitset of set bytes and distribute the non zero bytes
   148  	nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8)
   149  	if err != nil {
   150  		return nil, ptr, err
   151  	}
   152  	for i := 0; i < 8*len(nonZeroBitset); i++ {
   153  		if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 {
   154  			// Make sure we have enough data to push into the correct slot
   155  			if ptr >= len(data) {
   156  				return nil, 0, errMissingData
   157  			}
   158  			if i >= len(decomp) {
   159  				return nil, 0, errExceededTarget
   160  			}
   161  			// Make sure the data is valid and push into the slot
   162  			if data[ptr] == 0 {
   163  				return nil, 0, errZeroContent
   164  			}
   165  			decomp[i] = data[ptr]
   166  			ptr++
   167  		}
   168  	}
   169  	return decomp, ptr, nil
   170  }