github.com/aigarnetwork/aigar@v0.0.0-20191115204914-d59a6eb70f8e/common/bitutil/compress.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // Copyright 2019 The go-aigar Authors 3 // This file is part of the go-aigar library. 4 // 5 // The go-aigar library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-aigar library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-aigar library. If not, see <http://www.gnu.org/licenses/>. 17 18 package bitutil 19 20 import "errors" 21 22 var ( 23 // errMissingData is returned from decompression if the byte referenced by 24 // the bitset header overflows the input data. 25 errMissingData = errors.New("missing bytes on input") 26 27 // errUnreferencedData is returned from decompression if not all bytes were used 28 // up from the input data after decompressing it. 29 errUnreferencedData = errors.New("extra bytes on input") 30 31 // errExceededTarget is returned from decompression if the bitset header has 32 // more bits defined than the number of target buffer space available. 33 errExceededTarget = errors.New("target data size exceeded") 34 35 // errZeroContent is returned from decompression if a data byte referenced in 36 // the bitset header is actually a zero byte. 37 errZeroContent = errors.New("zero byte in input content") 38 ) 39 40 // The compression algorithm implemented by CompressBytes and DecompressBytes is 41 // optimized for sparse input data which contains a lot of zero bytes. Decompression 42 // requires knowledge of the decompressed data length. 43 // 44 // Compression works as follows: 45 // 46 // if data only contains zeroes, 47 // CompressBytes(data) == nil 48 // otherwise if len(data) <= 1, 49 // CompressBytes(data) == data 50 // otherwise: 51 // CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...) 52 // where 53 // nonZeroBitset(data) is a bit vector with len(data) bits (MSB first): 54 // nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0 if data[i] != 0 55 // len(nonZeroBitset(data)) == (len(data)+7)/8 56 // nonZeroBytes(data) contains the non-zero bytes of data in the same order 57 58 // CompressBytes compresses the input byte slice according to the sparse bitset 59 // representation algorithm. If the result is bigger than the original input, no 60 // compression is done. 61 func CompressBytes(data []byte) []byte { 62 if out := bitsetEncodeBytes(data); len(out) < len(data) { 63 return out 64 } 65 cpy := make([]byte, len(data)) 66 copy(cpy, data) 67 return cpy 68 } 69 70 // bitsetEncodeBytes compresses the input byte slice according to the sparse 71 // bitset representation algorithm. 72 func bitsetEncodeBytes(data []byte) []byte { 73 // Empty slices get compressed to nil 74 if len(data) == 0 { 75 return nil 76 } 77 // One byte slices compress to nil or retain the single byte 78 if len(data) == 1 { 79 if data[0] == 0 { 80 return nil 81 } 82 return data 83 } 84 // Calculate the bitset of set bytes, and gather the non-zero bytes 85 nonZeroBitset := make([]byte, (len(data)+7)/8) 86 nonZeroBytes := make([]byte, 0, len(data)) 87 88 for i, b := range data { 89 if b != 0 { 90 nonZeroBytes = append(nonZeroBytes, b) 91 nonZeroBitset[i/8] |= 1 << byte(7-i%8) 92 } 93 } 94 if len(nonZeroBytes) == 0 { 95 return nil 96 } 97 return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...) 98 } 99 100 // DecompressBytes decompresses data with a known target size. If the input data 101 // matches the size of the target, it means no compression was done in the first 102 // place. 103 func DecompressBytes(data []byte, target int) ([]byte, error) { 104 if len(data) > target { 105 return nil, errExceededTarget 106 } 107 if len(data) == target { 108 cpy := make([]byte, len(data)) 109 copy(cpy, data) 110 return cpy, nil 111 } 112 return bitsetDecodeBytes(data, target) 113 } 114 115 // bitsetDecodeBytes decompresses data with a known target size. 116 func bitsetDecodeBytes(data []byte, target int) ([]byte, error) { 117 out, size, err := bitsetDecodePartialBytes(data, target) 118 if err != nil { 119 return nil, err 120 } 121 if size != len(data) { 122 return nil, errUnreferencedData 123 } 124 return out, nil 125 } 126 127 // bitsetDecodePartialBytes decompresses data with a known target size, but does 128 // not enforce consuming all the input bytes. In addition to the decompressed 129 // output, the function returns the length of compressed input data corresponding 130 // to the output as the input slice may be longer. 131 func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) { 132 // Sanity check 0 targets to avoid infinite recursion 133 if target == 0 { 134 return nil, 0, nil 135 } 136 // Handle the zero and single byte corner cases 137 decomp := make([]byte, target) 138 if len(data) == 0 { 139 return decomp, 0, nil 140 } 141 if target == 1 { 142 decomp[0] = data[0] // copy to avoid referencing the input slice 143 if data[0] != 0 { 144 return decomp, 1, nil 145 } 146 return decomp, 0, nil 147 } 148 // Decompress the bitset of set bytes and distribute the non zero bytes 149 nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8) 150 if err != nil { 151 return nil, ptr, err 152 } 153 for i := 0; i < 8*len(nonZeroBitset); i++ { 154 if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 { 155 // Make sure we have enough data to push into the correct slot 156 if ptr >= len(data) { 157 return nil, 0, errMissingData 158 } 159 if i >= len(decomp) { 160 return nil, 0, errExceededTarget 161 } 162 // Make sure the data is valid and push into the slot 163 if data[ptr] == 0 { 164 return nil, 0, errZeroContent 165 } 166 decomp[i] = data[ptr] 167 ptr++ 168 } 169 } 170 return decomp, ptr, nil 171 }