github.com/core-coin/go-core/v2@v2.1.9/common/bitutil/compress.go (about) 1 // Copyright 2017 by the Authors 2 // This file is part of the go-core library. 3 // 4 // The go-core library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-core library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-core library. If not, see <http://www.gnu.org/licenses/>. 16 17 package bitutil 18 19 import "errors" 20 21 var ( 22 // errMissingData is returned from decompression if the byte referenced by 23 // the bitset header overflows the input data. 24 errMissingData = errors.New("missing bytes on input") 25 26 // errUnreferencedData is returned from decompression if not all bytes were used 27 // up from the input data after decompressing it. 28 errUnreferencedData = errors.New("extra bytes on input") 29 30 // errExceededTarget is returned from decompression if the bitset header has 31 // more bits defined than the number of target buffer space available. 32 errExceededTarget = errors.New("target data size exceeded") 33 34 // errZeroContent is returned from decompression if a data byte referenced in 35 // the bitset header is actually a zero byte. 36 errZeroContent = errors.New("zero byte in input content") 37 ) 38 39 // The compression algorithm implemented by CompressBytes and DecompressBytes is 40 // optimized for sparse input data which contains a lot of zero bytes. Decompression 41 // requires knowledge of the decompressed data length. 42 // 43 // Compression works as follows: 44 // 45 // if data only contains zeroes, 46 // CompressBytes(data) == nil 47 // otherwise if len(data) <= 1, 48 // CompressBytes(data) == data 49 // otherwise: 50 // CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...) 51 // where 52 // nonZeroBitset(data) is a bit vector with len(data) bits (MSB first): 53 // nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0 if data[i] != 0 54 // len(nonZeroBitset(data)) == (len(data)+7)/8 55 // nonZeroBytes(data) contains the non-zero bytes of data in the same order 56 57 // CompressBytes compresses the input byte slice according to the sparse bitset 58 // representation algorithm. If the result is bigger than the original input, no 59 // compression is done. 60 func CompressBytes(data []byte) []byte { 61 if out := bitsetEncodeBytes(data); len(out) < len(data) { 62 return out 63 } 64 cpy := make([]byte, len(data)) 65 copy(cpy, data) 66 return cpy 67 } 68 69 // bitsetEncodeBytes compresses the input byte slice according to the sparse 70 // bitset representation algorithm. 71 func bitsetEncodeBytes(data []byte) []byte { 72 // Empty slices get compressed to nil 73 if len(data) == 0 { 74 return nil 75 } 76 // One byte slices compress to nil or retain the single byte 77 if len(data) == 1 { 78 if data[0] == 0 { 79 return nil 80 } 81 return data 82 } 83 // Calculate the bitset of set bytes, and gather the non-zero bytes 84 nonZeroBitset := make([]byte, (len(data)+7)/8) 85 nonZeroBytes := make([]byte, 0, len(data)) 86 87 for i, b := range data { 88 if b != 0 { 89 nonZeroBytes = append(nonZeroBytes, b) 90 nonZeroBitset[i/8] |= 1 << byte(7-i%8) 91 } 92 } 93 if len(nonZeroBytes) == 0 { 94 return nil 95 } 96 return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...) 97 } 98 99 // DecompressBytes decompresses data with a known target size. If the input data 100 // matches the size of the target, it means no compression was done in the first 101 // place. 102 func DecompressBytes(data []byte, target int) ([]byte, error) { 103 if len(data) > target { 104 return nil, errExceededTarget 105 } 106 if len(data) == target { 107 cpy := make([]byte, len(data)) 108 copy(cpy, data) 109 return cpy, nil 110 } 111 return bitsetDecodeBytes(data, target) 112 } 113 114 // bitsetDecodeBytes decompresses data with a known target size. 115 func bitsetDecodeBytes(data []byte, target int) ([]byte, error) { 116 out, size, err := bitsetDecodePartialBytes(data, target) 117 if err != nil { 118 return nil, err 119 } 120 if size != len(data) { 121 return nil, errUnreferencedData 122 } 123 return out, nil 124 } 125 126 // bitsetDecodePartialBytes decompresses data with a known target size, but does 127 // not enforce consuming all the input bytes. In addition to the decompressed 128 // output, the function returns the length of compressed input data corresponding 129 // to the output as the input slice may be longer. 130 func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) { 131 // Sanity check 0 targets to avoid infinite recursion 132 if target == 0 { 133 return nil, 0, nil 134 } 135 // Handle the zero and single byte corner cases 136 decomp := make([]byte, target) 137 if len(data) == 0 { 138 return decomp, 0, nil 139 } 140 if target == 1 { 141 decomp[0] = data[0] // copy to avoid referencing the input slice 142 if data[0] != 0 { 143 return decomp, 1, nil 144 } 145 return decomp, 0, nil 146 } 147 // Decompress the bitset of set bytes and distribute the non zero bytes 148 nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8) 149 if err != nil { 150 return nil, ptr, err 151 } 152 for i := 0; i < 8*len(nonZeroBitset); i++ { 153 if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 { 154 // Make sure we have enough data to push into the correct slot 155 if ptr >= len(data) { 156 return nil, 0, errMissingData 157 } 158 if i >= len(decomp) { 159 return nil, 0, errExceededTarget 160 } 161 // Make sure the data is valid and push into the slot 162 if data[ptr] == 0 { 163 return nil, 0, errZeroContent 164 } 165 decomp[i] = data[ptr] 166 ptr++ 167 } 168 } 169 return decomp, ptr, nil 170 }