github.com/sberex/go-sberex@v1.8.2-0.20181113200658-ed96ac38f7d7/common/bitutil/compress.go (about) 1 // This file is part of the go-sberex library. The go-sberex library is 2 // free software: you can redistribute it and/or modify it under the terms 3 // of the GNU Lesser General Public License as published by the Free 4 // Software Foundation, either version 3 of the License, or (at your option) 5 // any later version. 6 // 7 // The go-sberex library is distributed in the hope that it will be useful, 8 // but WITHOUT ANY WARRANTY; without even the implied warranty of 9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser 10 // General Public License <http://www.gnu.org/licenses/> for more details. 11 12 package bitutil 13 14 import "errors" 15 16 var ( 17 // errMissingData is returned from decompression if the byte referenced by 18 // the bitset header overflows the input data. 19 errMissingData = errors.New("missing bytes on input") 20 21 // errUnreferencedData is returned from decompression if not all bytes were used 22 // up from the input data after decompressing it. 23 errUnreferencedData = errors.New("extra bytes on input") 24 25 // errExceededTarget is returned from decompression if the bitset header has 26 // more bits defined than the number of target buffer space available. 27 errExceededTarget = errors.New("target data size exceeded") 28 29 // errZeroContent is returned from decompression if a data byte referenced in 30 // the bitset header is actually a zero byte. 31 errZeroContent = errors.New("zero byte in input content") 32 ) 33 34 // The compression algorithm implemented by CompressBytes and DecompressBytes is 35 // optimized for sparse input data which contains a lot of zero bytes. Decompression 36 // requires knowledge of the decompressed data length. 37 // 38 // Compression works as follows: 39 // 40 // if data only contains zeroes, 41 // CompressBytes(data) == nil 42 // otherwise if len(data) <= 1, 43 // CompressBytes(data) == data 44 // otherwise: 45 // CompressBytes(data) == append(CompressBytes(nonZeroBitset(data)), nonZeroBytes(data)...) 46 // where 47 // nonZeroBitset(data) is a bit vector with len(data) bits (MSB first): 48 // nonZeroBitset(data)[i/8] && (1 << (7-i%8)) != 0 if data[i] != 0 49 // len(nonZeroBitset(data)) == (len(data)+7)/8 50 // nonZeroBytes(data) contains the non-zero bytes of data in the same order 51 52 // CompressBytes compresses the input byte slice according to the sparse bitset 53 // representation algorithm. If the result is bigger than the original input, no 54 // compression is done. 55 func CompressBytes(data []byte) []byte { 56 if out := bitsetEncodeBytes(data); len(out) < len(data) { 57 return out 58 } 59 cpy := make([]byte, len(data)) 60 copy(cpy, data) 61 return cpy 62 } 63 64 // bitsetEncodeBytes compresses the input byte slice according to the sparse 65 // bitset representation algorithm. 66 func bitsetEncodeBytes(data []byte) []byte { 67 // Empty slices get compressed to nil 68 if len(data) == 0 { 69 return nil 70 } 71 // One byte slices compress to nil or retain the single byte 72 if len(data) == 1 { 73 if data[0] == 0 { 74 return nil 75 } 76 return data 77 } 78 // Calculate the bitset of set bytes, and gather the non-zero bytes 79 nonZeroBitset := make([]byte, (len(data)+7)/8) 80 nonZeroBytes := make([]byte, 0, len(data)) 81 82 for i, b := range data { 83 if b != 0 { 84 nonZeroBytes = append(nonZeroBytes, b) 85 nonZeroBitset[i/8] |= 1 << byte(7-i%8) 86 } 87 } 88 if len(nonZeroBytes) == 0 { 89 return nil 90 } 91 return append(bitsetEncodeBytes(nonZeroBitset), nonZeroBytes...) 92 } 93 94 // DecompressBytes decompresses data with a known target size. If the input data 95 // matches the size of the target, it means no compression was done in the first 96 // place. 97 func DecompressBytes(data []byte, target int) ([]byte, error) { 98 if len(data) > target { 99 return nil, errExceededTarget 100 } 101 if len(data) == target { 102 cpy := make([]byte, len(data)) 103 copy(cpy, data) 104 return cpy, nil 105 } 106 return bitsetDecodeBytes(data, target) 107 } 108 109 // bitsetDecodeBytes decompresses data with a known target size. 110 func bitsetDecodeBytes(data []byte, target int) ([]byte, error) { 111 out, size, err := bitsetDecodePartialBytes(data, target) 112 if err != nil { 113 return nil, err 114 } 115 if size != len(data) { 116 return nil, errUnreferencedData 117 } 118 return out, nil 119 } 120 121 // bitsetDecodePartialBytes decompresses data with a known target size, but does 122 // not enforce consuming all the input bytes. In addition to the decompressed 123 // output, the function returns the length of compressed input data corresponding 124 // to the output as the input slice may be longer. 125 func bitsetDecodePartialBytes(data []byte, target int) ([]byte, int, error) { 126 // Sanity check 0 targets to avoid infinite recursion 127 if target == 0 { 128 return nil, 0, nil 129 } 130 // Handle the zero and single byte corner cases 131 decomp := make([]byte, target) 132 if len(data) == 0 { 133 return decomp, 0, nil 134 } 135 if target == 1 { 136 decomp[0] = data[0] // copy to avoid referencing the input slice 137 if data[0] != 0 { 138 return decomp, 1, nil 139 } 140 return decomp, 0, nil 141 } 142 // Decompress the bitset of set bytes and distribute the non zero bytes 143 nonZeroBitset, ptr, err := bitsetDecodePartialBytes(data, (target+7)/8) 144 if err != nil { 145 return nil, ptr, err 146 } 147 for i := 0; i < 8*len(nonZeroBitset); i++ { 148 if nonZeroBitset[i/8]&(1<<byte(7-i%8)) != 0 { 149 // Make sure we have enough data to push into the correct slot 150 if ptr >= len(data) { 151 return nil, 0, errMissingData 152 } 153 if i >= len(decomp) { 154 return nil, 0, errExceededTarget 155 } 156 // Make sure the data is valid and push into the slot 157 if data[ptr] == 0 { 158 return nil, 0, errZeroContent 159 } 160 decomp[i] = data[ptr] 161 ptr++ 162 } 163 } 164 return decomp, ptr, nil 165 }