github.com/aigarnetwork/aigar@v0.0.0-20191115204914-d59a6eb70f8e/common/bitutil/compress.go (about)

     1  //  Copyright 2018 The go-ethereum Authors
     2  //  Copyright 2019 The go-aigar Authors
     3  //  This file is part of the go-aigar library.
     4  //
     5  //  The go-aigar library is free software: you can redistribute it and/or modify
     6  //  it under the terms of the GNU Lesser General Public License as published by
     7  //  the Free Software Foundation, either version 3 of the License, or
     8  //  (at your option) any later version.
     9  //
    10  //  The go-aigar library is distributed in the hope that it will be useful,
    11  //  but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  //  GNU Lesser General Public License for more details.
    14  //
    15  //  You should have received a copy of the GNU Lesser General Public License
    16  //  along with the go-aigar library. If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package bitutil
    19  
    20  import "errors"
    21  
    22  var (
    23  	// errMissingData is returned from decompression if the byte referenced by
    24  	// the bitset header overflows the input data.
    25  	errMissingData = errors.New("missing bytes on input")
    26  
    27  	// errUnreferencedData is returned from decompression if not all bytes were used
    28  	// up from the input data after decompressing it.
    29  	errUnreferencedData = errors.New("extra bytes on input")
    30  
    31  	// errExceededTarget is returned from decompression if the bitset header has
    32  	// more bits defined than the number of target buffer space available.
    33  	errExceededTarget = errors.New("target data size exceeded")
    34  
    35  	// errZeroContent is returned from decompression if a data byte referenced in
    36  	// the bitset header is actually a zero byte.
    37  	errZeroContent = errors.New("zero byte in input content")
    38  )
    39  
    40  // The compression algorithm implemented by CompressBytes and DecompressBytes is
    41  // optimized for sparse input data which contains a lot of zero bytes. Decompression
    42  // requires knowledge of the decompressed data length.
    43  //
    44  // Compression works as follows:
    45  //
    46  //   if data only contains zeroes,
    47  //       CompressBytes(data) == nil
    48  //   otherwise if len(data) <= 1,
    49  //       CompressBytes(data) == data
    50  //   otherwise:
    51  //       CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...)
    52  //       where
    53  //         nonZeroBitset(data) is a bit vector with len(data) bits (MSB first):
    54  //             nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0  if data[i] != 0
    55  //             len(nonZeroBitset(data)) == (len(data)+7)/8
    56  //         nonZeroBytes(data) contains the non-zero bytes of data in the same order
    57  
    58  // CompressBytes compresses the input byte slice according to the sparse bitset
    59  // representation algorithm. If the result is bigger than the original input, no
    60  // compression is done.
    61  func CompressBytes(data []byte) []byte {
    62  	if out := bitsetEncodeBytes(data); len(out) < len(data) {
    63  		return out
    64  	}
    65  	cpy := make([]byte, len(data))
    66  	copy(cpy, data)
    67  	return cpy
    68  }
    69  
    70  // bitsetEncodeBytes compresses the input byte slice according to the sparse
    71  // bitset representation algorithm.
    72  func bitsetEncodeBytes(data []byte) []byte {
    73  	// Empty slices get compressed to nil
    74  	if len(data) == 0 {
    75  		return nil
    76  	}
    77  	// One byte slices compress to nil or retain the single byte
    78  	if len(data) == 1 {
    79  		if data[0] == 0 {
    80  			return nil
    81  		}
    82  		return data
    83  	}
    84  	// Calculate the bitset of set bytes, and gather the non-zero bytes
    85  	nonZeroBitset := make([]byte, (len(data)+7)/8)
    86  	nonZeroBytes := make([]byte, 0, len(data))
    87  
    88  	for i, b := range data {
    89  		if b != 0 {
    90  			nonZeroBytes = append(nonZeroBytes, b)
    91  			nonZeroBitset[i/8] |= 1 << byte(7-i%8)
    92  		}
    93  	}
    94  	if len(nonZeroBytes) == 0 {
    95  		return nil
    96  	}
    97  	return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...)
    98  }
    99  
   100  // DecompressBytes decompresses data with a known target size. If the input data
   101  // matches the size of the target, it means no compression was done in the first
   102  // place.
   103  func DecompressBytes(data []byte, target int) ([]byte, error) {
   104  	if len(data) > target {
   105  		return nil, errExceededTarget
   106  	}
   107  	if len(data) == target {
   108  		cpy := make([]byte, len(data))
   109  		copy(cpy, data)
   110  		return cpy, nil
   111  	}
   112  	return bitsetDecodeBytes(data, target)
   113  }
   114  
   115  // bitsetDecodeBytes decompresses data with a known target size.
   116  func bitsetDecodeBytes(data []byte, target int) ([]byte, error) {
   117  	out, size, err := bitsetDecodePartialBytes(data, target)
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  	if size != len(data) {
   122  		return nil, errUnreferencedData
   123  	}
   124  	return out, nil
   125  }
   126  
   127  // bitsetDecodePartialBytes decompresses data with a known target size, but does
   128  // not enforce consuming all the input bytes. In addition to the decompressed
   129  // output, the function returns the length of compressed input data corresponding
   130  // to the output as the input slice may be longer.
   131  func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) {
   132  	// Sanity check 0 targets to avoid infinite recursion
   133  	if target == 0 {
   134  		return nil, 0, nil
   135  	}
   136  	// Handle the zero and single byte corner cases
   137  	decomp := make([]byte, target)
   138  	if len(data) == 0 {
   139  		return decomp, 0, nil
   140  	}
   141  	if target == 1 {
   142  		decomp[0] = data[0] // copy to avoid referencing the input slice
   143  		if data[0] != 0 {
   144  			return decomp, 1, nil
   145  		}
   146  		return decomp, 0, nil
   147  	}
   148  	// Decompress the bitset of set bytes and distribute the non zero bytes
   149  	nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8)
   150  	if err != nil {
   151  		return nil, ptr, err
   152  	}
   153  	for i := 0; i < 8*len(nonZeroBitset); i++ {
   154  		if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 {
   155  			// Make sure we have enough data to push into the correct slot
   156  			if ptr >= len(data) {
   157  				return nil, 0, errMissingData
   158  			}
   159  			if i >= len(decomp) {
   160  				return nil, 0, errExceededTarget
   161  			}
   162  			// Make sure the data is valid and push into the slot
   163  			if data[ptr] == 0 {
   164  				return nil, 0, errZeroContent
   165  			}
   166  			decomp[i] = data[ptr]
   167  			ptr++
   168  		}
   169  	}
   170  	return decomp, ptr, nil
   171  }