github.com/arieschain/arieschain@v0.0.0-20191023063405-37c074544356/common/bitutil/compress.go (about) 1 package bitutil 2 3 import "errors" 4 5 var ( 6 // errMissingData is returned from decompression if the byte referenced by 7 // the bitset header overflows the input data. 8 errMissingData = errors.New("missing bytes on input") 9 10 // errUnreferencedData is returned from decompression if not all bytes were used 11 // up from the input data after decompressing it. 12 errUnreferencedData = errors.New("extra bytes on input") 13 14 // errExceededTarget is returned from decompression if the bitset header has 15 // more bits defined than the number of target buffer space available. 16 errExceededTarget = errors.New("target data size exceeded") 17 18 // errZeroContent is returned from decompression if a data byte referenced in 19 // the bitset header is actually a zero byte. 20 errZeroContent = errors.New("zero byte in input content") 21 ) 22 23 // The compression algorithm implemented by CompressBytes and DecompressBytes is 24 // optimized for sparse input data which contains a lot of zero bytes. Decompression 25 // requires knowledge of the decompressed data length. 26 // 27 // Compression works as follows: 28 // 29 // if data only contains zeroes, 30 // CompressBytes(data) == nil 31 // otherwise if len(data) <= 1, 32 // CompressBytes(data) == data 33 // otherwise: 34 // CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...) 35 // where 36 // nonZeroBitset(data) is a bit vector with len(data) bits (MSB first): 37 // nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0 if data[i] != 0 38 // len(nonZeroBitset(data)) == (len(data)+7)/8 39 // nonZeroBytes(data) contains the non-zero bytes of data in the same order 40 41 // CompressBytes compresses the input byte slice according to the sparse bitset 42 // representation algorithm. If the result is bigger than the original input, no 43 // compression is done. 44 func CompressBytes(data []byte) []byte { 45 if out := bitsetEncodeBytes(data); len(out) < len(data) { 46 return out 47 } 48 cpy := make([]byte, len(data)) 49 copy(cpy, data) 50 return cpy 51 } 52 53 // bitsetEncodeBytes compresses the input byte slice according to the sparse 54 // bitset representation algorithm. 55 func bitsetEncodeBytes(data []byte) []byte { 56 // Empty slices get compressed to nil 57 if len(data) == 0 { 58 return nil 59 } 60 // One byte slices compress to nil or retain the single byte 61 if len(data) == 1 { 62 if data[0] == 0 { 63 return nil 64 } 65 return data 66 } 67 // Calculate the bitset of set bytes, and gather the non-zero bytes 68 nonZeroBitset := make([]byte, (len(data)+7)/8) 69 nonZeroBytes := make([]byte, 0, len(data)) 70 71 for i, b := range data { 72 if b != 0 { 73 nonZeroBytes = append(nonZeroBytes, b) 74 nonZeroBitset[i/8] |= 1 << byte(7-i%8) 75 } 76 } 77 if len(nonZeroBytes) == 0 { 78 return nil 79 } 80 return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...) 81 } 82 83 // DecompressBytes decompresses data with a known target size. If the input data 84 // matches the size of the target, it means no compression was done in the first 85 // place. 86 func DecompressBytes(data []byte, target int) ([]byte, error) { 87 if len(data) > target { 88 return nil, errExceededTarget 89 } 90 if len(data) == target { 91 cpy := make([]byte, len(data)) 92 copy(cpy, data) 93 return cpy, nil 94 } 95 return bitsetDecodeBytes(data, target) 96 } 97 98 // bitsetDecodeBytes decompresses data with a known target size. 99 func bitsetDecodeBytes(data []byte, target int) ([]byte, error) { 100 out, size, err := bitsetDecodePartialBytes(data, target) 101 if err != nil { 102 return nil, err 103 } 104 if size != len(data) { 105 return nil, errUnreferencedData 106 } 107 return out, nil 108 } 109 110 // bitsetDecodePartialBytes decompresses data with a known target size, but does 111 // not enforce consuming all the input bytes. In addition to the decompressed 112 // output, the function returns the length of compressed input data corresponding 113 // to the output as the input slice may be longer. 114 func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) { 115 // Sanity check 0 targets to avoid infinite recursion 116 if target == 0 { 117 return nil, 0, nil 118 } 119 // Handle the zero and single byte corner cases 120 decomp := make([]byte, target) 121 if len(data) == 0 { 122 return decomp, 0, nil 123 } 124 if target == 1 { 125 decomp[0] = data[0] // copy to avoid referencing the input slice 126 if data[0] != 0 { 127 return decomp, 1, nil 128 } 129 return decomp, 0, nil 130 } 131 // Decompress the bitset of set bytes and distribute the non zero bytes 132 nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8) 133 if err != nil { 134 return nil, ptr, err 135 } 136 for i := 0; i < 8*len(nonZeroBitset); i++ { 137 if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 { 138 // Make sure we have enough data to push into the correct slot 139 if ptr >= len(data) { 140 return nil, 0, errMissingData 141 } 142 if i >= len(decomp) { 143 return nil, 0, errExceededTarget 144 } 145 // Make sure the data is valid and push into the slot 146 if data[ptr] == 0 { 147 return nil, 0, errZeroContent 148 } 149 decomp[i] = data[ptr] 150 ptr++ 151 } 152 } 153 return decomp, ptr, nil 154 }