github.com/klaytn/klaytn@v1.12.1/common/bitutil/compress.go (about) 1 // Modifications Copyright 2018 The klaytn Authors 2 // Copyright 2017 The go-ethereum Authors 3 // This file is part of the go-ethereum library. 4 // 5 // The go-ethereum library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-ethereum library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 17 // 18 // This file is derived from common/bitutil/compress.go (2018/06/04). 19 // Modified and improved for the klaytn development. 20 21 package bitutil 22 23 import "errors" 24 25 var ( 26 // errMissingData is returned from decompression if the byte referenced by 27 // the bitset header overflows the input data. 28 errMissingData = errors.New("missing bytes on input") 29 30 // errUnreferencedData is returned from decompression if not all bytes were used 31 // up from the input data after decompressing it. 32 errUnreferencedData = errors.New("extra bytes on input") 33 34 // errExceededTarget is returned from decompression if the bitset header has 35 // more bits defined than the number of target buffer space available. 36 errExceededTarget = errors.New("target data size exceeded") 37 38 // errZeroContent is returned from decompression if a data byte referenced in 39 // the bitset header is actually a zero byte. 40 errZeroContent = errors.New("zero byte in input content") 41 ) 42 43 // The compression algorithm implemented by CompressBytes and DecompressBytes is 44 // optimized for sparse input data which contains a lot of zero bytes. Decompression 45 // requires knowledge of the decompressed data length. 46 // 47 // Compression works as follows: 48 // 49 // if data only contains zeroes, 50 // CompressBytes(data) == nil 51 // otherwise if len(data) <= 1, 52 // CompressBytes(data) == data 53 // otherwise: 54 // CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...) 55 // where 56 // nonZeroBitset(data) is a bit vector with len(data) bits (MSB first): 57 // nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0 if data[i] != 0 58 // len(nonZeroBitset(data)) == (len(data)+7)/8 59 // nonZeroBytes(data) contains the non-zero bytes of data in the same order 60 61 // CompressBytes compresses the input byte slice according to the sparse bitset 62 // representation algorithm. If the result is bigger than the original input, no 63 // compression is done. 64 func CompressBytes(data []byte) []byte { 65 if out := bitsetEncodeBytes(data); len(out) < len(data) { 66 return out 67 } 68 cpy := make([]byte, len(data)) 69 copy(cpy, data) 70 return cpy 71 } 72 73 // bitsetEncodeBytes compresses the input byte slice according to the sparse 74 // bitset representation algorithm. 75 func bitsetEncodeBytes(data []byte) []byte { 76 // Empty slices get compressed to nil 77 if len(data) == 0 { 78 return nil 79 } 80 // One byte slices compress to nil or retain the single byte 81 if len(data) == 1 { 82 if data[0] == 0 { 83 return nil 84 } 85 return data 86 } 87 // Calculate the bitset of set bytes, and gather the non-zero bytes 88 nonZeroBitset := make([]byte, (len(data)+7)/8) 89 nonZeroBytes := make([]byte, 0, len(data)) 90 91 for i, b := range data { 92 if b != 0 { 93 nonZeroBytes = append(nonZeroBytes, b) 94 nonZeroBitset[i/8] |= 1 << byte(7-i%8) 95 } 96 } 97 if len(nonZeroBytes) == 0 { 98 return nil 99 } 100 return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...) 101 } 102 103 // DecompressBytes decompresses data with a known target size. If the input data 104 // matches the size of the target, it means no compression was done in the first 105 // place. 106 func DecompressBytes(data []byte, target int) ([]byte, error) { 107 if len(data) > target { 108 return nil, errExceededTarget 109 } 110 if len(data) == target { 111 cpy := make([]byte, len(data)) 112 copy(cpy, data) 113 return cpy, nil 114 } 115 return bitsetDecodeBytes(data, target) 116 } 117 118 // bitsetDecodeBytes decompresses data with a known target size. 119 func bitsetDecodeBytes(data []byte, target int) ([]byte, error) { 120 out, size, err := bitsetDecodePartialBytes(data, target) 121 if err != nil { 122 return nil, err 123 } 124 if size != len(data) { 125 return nil, errUnreferencedData 126 } 127 return out, nil 128 } 129 130 // bitsetDecodePartialBytes decompresses data with a known target size, but does 131 // not enforce consuming all the input bytes. In addition to the decompressed 132 // output, the function returns the length of compressed input data corresponding 133 // to the output as the input slice may be longer. 134 func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) { 135 // Sanity check 0 targets to avoid infinite recursion 136 if target == 0 { 137 return nil, 0, nil 138 } 139 // Handle the zero and single byte corner cases 140 decomp := make([]byte, target) 141 if len(data) == 0 { 142 return decomp, 0, nil 143 } 144 if target == 1 { 145 decomp[0] = data[0] // copy to avoid referencing the input slice 146 if data[0] != 0 { 147 return decomp, 1, nil 148 } 149 return decomp, 0, nil 150 } 151 // Decompress the bitset of set bytes and distribute the non zero bytes 152 nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8) 153 if err != nil { 154 return nil, ptr, err 155 } 156 for i := 0; i < 8*len(nonZeroBitset); i++ { 157 if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 { 158 // Make sure we have enough data to push into the correct slot 159 if ptr >= len(data) { 160 return nil, 0, errMissingData 161 } 162 if i >= len(decomp) { 163 return nil, 0, errExceededTarget 164 } 165 // Make sure the data is valid and push into the slot 166 if data[ptr] == 0 { 167 return nil, 0, errZeroContent 168 } 169 decomp[i] = data[ptr] 170 ptr++ 171 } 172 } 173 return decomp, ptr, nil 174 }