github.com/bir3/gocompiler@v0.9.2202/extra/compress/huff0/huff0.go (about) 1 // Package huff0 provides fast huffman encoding as used in zstd. 2 // 3 // See README.md at https://github.com/klauspost/compress/tree/master/huff0 for details. 4 package huff0 5 6 import ( 7 "errors" 8 "fmt" 9 "math" 10 "math/bits" 11 "sync" 12 13 "github.com/bir3/gocompiler/extra/compress/fse" 14 ) 15 16 const ( 17 maxSymbolValue = 255 18 19 // zstandard limits tablelog to 11, see: 20 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#huffman-tree-description 21 tableLogMax = 11 22 tableLogDefault = 11 23 minTablelog = 5 24 huffNodesLen = 512 25 26 // BlockSizeMax is maximum input size for a single block uncompressed. 27 BlockSizeMax = 1<<18 - 1 28 ) 29 30 var ( 31 // ErrIncompressible is returned when input is judged to be too hard to compress. 32 ErrIncompressible = errors.New("input is not compressible") 33 34 // ErrUseRLE is returned from the compressor when the input is a single byte value repeated. 35 ErrUseRLE = errors.New("input is single value repeated") 36 37 // ErrTooBig is return if input is too large for a single block. 38 ErrTooBig = errors.New("input too big") 39 40 // ErrMaxDecodedSizeExceeded is return if input is too large for a single block. 41 ErrMaxDecodedSizeExceeded = errors.New("maximum output size exceeded") 42 ) 43 44 type ReusePolicy uint8 45 46 const ( 47 // ReusePolicyAllow will allow reuse if it produces smaller output. 48 ReusePolicyAllow ReusePolicy = iota 49 50 // ReusePolicyPrefer will re-use aggressively if possible. 51 // This will not check if a new table will produce smaller output, 52 // except if the current table is impossible to use or 53 // compressed output is bigger than input. 54 ReusePolicyPrefer 55 56 // ReusePolicyNone will disable re-use of tables. 57 // This is slightly faster than ReusePolicyAllow but may produce larger output. 58 ReusePolicyNone 59 60 // ReusePolicyMust must allow reuse and produce smaller output. 61 ReusePolicyMust 62 ) 63 64 type Scratch struct { 65 count [maxSymbolValue + 1]uint32 66 67 // Per block parameters. 68 // These can be used to override compression parameters of the block. 69 // Do not touch, unless you know what you are doing. 70 71 // Out is output buffer. 72 // If the scratch is re-used before the caller is done processing the output, 73 // set this field to nil. 74 // Otherwise the output buffer will be re-used for next Compression/Decompression step 75 // and allocation will be avoided. 76 Out []byte 77 78 // OutTable will contain the table data only, if a new table has been generated. 79 // Slice of the returned data. 80 OutTable []byte 81 82 // OutData will contain the compressed data. 83 // Slice of the returned data. 84 OutData []byte 85 86 // MaxDecodedSize will set the maximum allowed output size. 87 // This value will automatically be set to BlockSizeMax if not set. 88 // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. 89 MaxDecodedSize int 90 91 br byteReader 92 93 // MaxSymbolValue will override the maximum symbol value of the next block. 94 MaxSymbolValue uint8 95 96 // TableLog will attempt to override the tablelog for the next block. 97 // Must be <= 11 and >= 5. 98 TableLog uint8 99 100 // Reuse will specify the reuse policy 101 Reuse ReusePolicy 102 103 // WantLogLess allows to specify a log 2 reduction that should at least be achieved, 104 // otherwise the block will be returned as incompressible. 105 // The reduction should then at least be (input size >> WantLogLess) 106 // If WantLogLess == 0 any improvement will do. 107 WantLogLess uint8 108 109 symbolLen uint16 // Length of active part of the symbol table. 110 maxCount int // count of the most probable symbol 111 clearCount bool // clear count 112 actualTableLog uint8 // Selected tablelog. 113 prevTableLog uint8 // Tablelog for previous table 114 prevTable cTable // Table used for previous compression. 115 cTable cTable // compression table 116 dt dTable // decompression table 117 nodes []nodeElt 118 tmpOut [4][]byte 119 fse *fse.Scratch 120 decPool sync.Pool // *[4][256]byte buffers. 121 huffWeight [maxSymbolValue + 1]byte 122 } 123 124 // TransferCTable will transfer the previously used compression table. 125 func (s *Scratch) TransferCTable(src *Scratch) { 126 if cap(s.prevTable) < len(src.prevTable) { 127 s.prevTable = make(cTable, 0, maxSymbolValue+1) 128 } 129 s.prevTable = s.prevTable[:len(src.prevTable)] 130 copy(s.prevTable, src.prevTable) 131 s.prevTableLog = src.prevTableLog 132 } 133 134 func (s *Scratch) prepare(in []byte) (*Scratch, error) { 135 if len(in) > BlockSizeMax { 136 return nil, ErrTooBig 137 } 138 if s == nil { 139 s = &Scratch{} 140 } 141 if s.MaxSymbolValue == 0 { 142 s.MaxSymbolValue = maxSymbolValue 143 } 144 if s.TableLog == 0 { 145 s.TableLog = tableLogDefault 146 } 147 if s.TableLog > tableLogMax || s.TableLog < minTablelog { 148 return nil, fmt.Errorf(" invalid tableLog %d (%d -> %d)", s.TableLog, minTablelog, tableLogMax) 149 } 150 if s.MaxDecodedSize <= 0 || s.MaxDecodedSize > BlockSizeMax { 151 s.MaxDecodedSize = BlockSizeMax 152 } 153 if s.clearCount && s.maxCount == 0 { 154 for i := range s.count { 155 s.count[i] = 0 156 } 157 s.clearCount = false 158 } 159 if cap(s.Out) == 0 { 160 s.Out = make([]byte, 0, len(in)) 161 } 162 s.Out = s.Out[:0] 163 164 s.OutTable = nil 165 s.OutData = nil 166 if cap(s.nodes) < huffNodesLen+1 { 167 s.nodes = make([]nodeElt, 0, huffNodesLen+1) 168 } 169 s.nodes = s.nodes[:0] 170 if s.fse == nil { 171 s.fse = &fse.Scratch{} 172 } 173 s.br.init(in) 174 175 return s, nil 176 } 177 178 type cTable []cTableEntry 179 180 func (c cTable) write(s *Scratch) error { 181 var ( 182 // precomputed conversion table 183 bitsToWeight [tableLogMax + 1]byte 184 huffLog = s.actualTableLog 185 // last weight is not saved. 186 maxSymbolValue = uint8(s.symbolLen - 1) 187 huffWeight = s.huffWeight[:256] 188 ) 189 const ( 190 maxFSETableLog = 6 191 ) 192 // convert to weight 193 bitsToWeight[0] = 0 194 for n := uint8(1); n < huffLog+1; n++ { 195 bitsToWeight[n] = huffLog + 1 - n 196 } 197 198 // Acquire histogram for FSE. 199 hist := s.fse.Histogram() 200 hist = hist[:256] 201 for i := range hist[:16] { 202 hist[i] = 0 203 } 204 for n := uint8(0); n < maxSymbolValue; n++ { 205 v := bitsToWeight[c[n].nBits] & 15 206 huffWeight[n] = v 207 hist[v]++ 208 } 209 210 // FSE compress if feasible. 211 if maxSymbolValue >= 2 { 212 huffMaxCnt := uint32(0) 213 huffMax := uint8(0) 214 for i, v := range hist[:16] { 215 if v == 0 { 216 continue 217 } 218 huffMax = byte(i) 219 if v > huffMaxCnt { 220 huffMaxCnt = v 221 } 222 } 223 s.fse.HistogramFinished(huffMax, int(huffMaxCnt)) 224 s.fse.TableLog = maxFSETableLog 225 b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse) 226 if err == nil && len(b) < int(s.symbolLen>>1) { 227 s.Out = append(s.Out, uint8(len(b))) 228 s.Out = append(s.Out, b...) 229 return nil 230 } 231 // Unable to compress (RLE/uncompressible) 232 } 233 // write raw values as 4-bits (max : 15) 234 if maxSymbolValue > (256 - 128) { 235 // should not happen : likely means source cannot be compressed 236 return ErrIncompressible 237 } 238 op := s.Out 239 // special case, pack weights 4 bits/weight. 240 op = append(op, 128|(maxSymbolValue-1)) 241 // be sure it doesn't cause msan issue in final combination 242 huffWeight[maxSymbolValue] = 0 243 for n := uint16(0); n < uint16(maxSymbolValue); n += 2 { 244 op = append(op, (huffWeight[n]<<4)|huffWeight[n+1]) 245 } 246 s.Out = op 247 return nil 248 } 249 250 func (c cTable) estTableSize(s *Scratch) (sz int, err error) { 251 var ( 252 // precomputed conversion table 253 bitsToWeight [tableLogMax + 1]byte 254 huffLog = s.actualTableLog 255 // last weight is not saved. 256 maxSymbolValue = uint8(s.symbolLen - 1) 257 huffWeight = s.huffWeight[:256] 258 ) 259 const ( 260 maxFSETableLog = 6 261 ) 262 // convert to weight 263 bitsToWeight[0] = 0 264 for n := uint8(1); n < huffLog+1; n++ { 265 bitsToWeight[n] = huffLog + 1 - n 266 } 267 268 // Acquire histogram for FSE. 269 hist := s.fse.Histogram() 270 hist = hist[:256] 271 for i := range hist[:16] { 272 hist[i] = 0 273 } 274 for n := uint8(0); n < maxSymbolValue; n++ { 275 v := bitsToWeight[c[n].nBits] & 15 276 huffWeight[n] = v 277 hist[v]++ 278 } 279 280 // FSE compress if feasible. 281 if maxSymbolValue >= 2 { 282 huffMaxCnt := uint32(0) 283 huffMax := uint8(0) 284 for i, v := range hist[:16] { 285 if v == 0 { 286 continue 287 } 288 huffMax = byte(i) 289 if v > huffMaxCnt { 290 huffMaxCnt = v 291 } 292 } 293 s.fse.HistogramFinished(huffMax, int(huffMaxCnt)) 294 s.fse.TableLog = maxFSETableLog 295 b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse) 296 if err == nil && len(b) < int(s.symbolLen>>1) { 297 sz += 1 + len(b) 298 return sz, nil 299 } 300 // Unable to compress (RLE/uncompressible) 301 } 302 // write raw values as 4-bits (max : 15) 303 if maxSymbolValue > (256 - 128) { 304 // should not happen : likely means source cannot be compressed 305 return 0, ErrIncompressible 306 } 307 // special case, pack weights 4 bits/weight. 308 sz += 1 + int(maxSymbolValue/2) 309 return sz, nil 310 } 311 312 // estimateSize returns the estimated size in bytes of the input represented in the 313 // histogram supplied. 314 func (c cTable) estimateSize(hist []uint32) int { 315 nbBits := uint32(7) 316 for i, v := range c[:len(hist)] { 317 nbBits += uint32(v.nBits) * hist[i] 318 } 319 return int(nbBits >> 3) 320 } 321 322 // minSize returns the minimum possible size considering the shannon limit. 323 func (s *Scratch) minSize(total int) int { 324 nbBits := float64(7) 325 fTotal := float64(total) 326 for _, v := range s.count[:s.symbolLen] { 327 n := float64(v) 328 if n > 0 { 329 nbBits += math.Log2(fTotal/n) * n 330 } 331 } 332 return int(nbBits) >> 3 333 } 334 335 func highBit32(val uint32) (n uint32) { 336 return uint32(bits.Len32(val) - 1) 337 }