github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/klauspost/compress/flate/huffman_code.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package flate 6 7 import ( 8 "math" 9 "sort" 10 ) 11 12 type hcode uint32 13 14 type huffmanEncoder struct { 15 codes []hcode 16 freqcache []literalNode 17 bitCount [17]int32 18 lns literalNodeSorter 19 lfs literalFreqSorter 20 } 21 22 type literalNode struct { 23 literal uint16 24 freq int32 25 } 26 27 // A levelInfo describes the state of the constructed tree for a given depth. 28 type levelInfo struct { 29 // Our level. for better printing 30 level int32 31 32 // The frequency of the last node at this level 33 lastFreq int32 34 35 // The frequency of the next character to add to this level 36 nextCharFreq int32 37 38 // The frequency of the next pair (from level below) to add to this level. 39 // Only valid if the "needed" value of the next lower level is 0. 40 nextPairFreq int32 41 42 // The number of chains remaining to generate for this level before moving 43 // up to the next level 44 needed int32 45 } 46 47 func (h hcode) codeBits() (code uint16, bits uint8) { 48 return uint16(h), uint8(h >> 16) 49 } 50 51 func (h *hcode) set(code uint16, bits uint8) { 52 *h = hcode(code) | hcode(uint32(bits)<<16) 53 } 54 55 func (h *hcode) setBits(bits uint8) { 56 *h = hcode(*h&0xffff) | hcode(uint32(bits)<<16) 57 } 58 59 func toCode(code uint16, bits uint8) hcode { 60 return hcode(code) | hcode(uint32(bits)<<16) 61 } 62 63 func (h hcode) code() (code uint16) { 64 return uint16(h) 65 } 66 67 func (h hcode) bits() (bits uint) { 68 return uint(h >> 16) 69 } 70 71 func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} } 72 73 func newHuffmanEncoder(size int) *huffmanEncoder { 74 return &huffmanEncoder{codes: make([]hcode, size), freqcache: nil} 75 } 76 77 // Generates a HuffmanCode corresponding to the fixed literal table 78 func generateFixedLiteralEncoding() *huffmanEncoder { 79 h := newHuffmanEncoder(maxNumLit) 80 codes := h.codes 81 var ch uint16 82 for ch = 0; ch < maxNumLit; ch++ { 83 var bits uint16 84 var size uint8 85 switch { 86 case ch < 144: 87 // size 8, 000110000 .. 10111111 88 bits = ch + 48 89 size = 8 90 break 91 case ch < 256: 92 // size 9, 110010000 .. 111111111 93 bits = ch + 400 - 144 94 size = 9 95 break 96 case ch < 280: 97 // size 7, 0000000 .. 0010111 98 bits = ch - 256 99 size = 7 100 break 101 default: 102 // size 8, 11000000 .. 11000111 103 bits = ch + 192 - 280 104 size = 8 105 } 106 codes[ch] = toCode(reverseBits(bits, size), size) 107 } 108 return h 109 } 110 111 func generateFixedOffsetEncoding() *huffmanEncoder { 112 h := newHuffmanEncoder(30) 113 codes := h.codes 114 for ch := uint16(0); ch < 30; ch++ { 115 codes[ch] = toCode(reverseBits(ch, 5), 5) 116 } 117 return h 118 } 119 120 var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() 121 var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() 122 123 func (h *huffmanEncoder) bitLength(freq []int32) int64 { 124 var total int64 125 for i, f := range freq { 126 if f != 0 { 127 total += int64(f) * int64(h.codes[i].bits()) 128 } 129 } 130 return total 131 } 132 133 const maxBitsLimit = 16 134 135 // Return the number of literals assigned to each bit size in the Huffman encoding 136 // 137 // This method is only called when list.length >= 3 138 // The cases of 0, 1, and 2 literals are handled by special case code. 139 // 140 // list An array of the literals with non-zero frequencies 141 // and their associated frequencies. The array is in order of increasing 142 // frequency, and has as its last element a special element with frequency 143 // MaxInt32 144 // maxBits The maximum number of bits that should be used to encode any literal. 145 // Must be less than 16. 146 // return An integer array in which array[i] indicates the number of literals 147 // that should be encoded in i bits. 148 func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { 149 if maxBits >= maxBitsLimit { 150 panic("flate: maxBits too large") 151 } 152 n := int32(len(list)) 153 list = list[0 : n+1] 154 list[n] = maxNode() 155 156 // The tree can't have greater depth than n - 1, no matter what. This 157 // saves a little bit of work in some small cases 158 if maxBits > n-1 { 159 maxBits = n - 1 160 } 161 162 // Create information about each of the levels. 163 // A bogus "Level 0" whose sole purpose is so that 164 // level1.prev.needed==0. This makes level1.nextPairFreq 165 // be a legitimate value that never gets chosen. 166 var levels [maxBitsLimit]levelInfo 167 // leafCounts[i] counts the number of literals at the left 168 // of ancestors of the rightmost node at level i. 169 // leafCounts[i][j] is the number of literals at the left 170 // of the level j ancestor. 171 var leafCounts [maxBitsLimit][maxBitsLimit]int32 172 173 for level := int32(1); level <= maxBits; level++ { 174 // For every level, the first two items are the first two characters. 175 // We initialize the levels as if we had already figured this out. 176 levels[level] = levelInfo{ 177 level: level, 178 lastFreq: list[1].freq, 179 nextCharFreq: list[2].freq, 180 nextPairFreq: list[0].freq + list[1].freq, 181 } 182 leafCounts[level][level] = 2 183 if level == 1 { 184 levels[level].nextPairFreq = math.MaxInt32 185 } 186 } 187 188 // We need a total of 2*n - 2 items at top level and have already generated 2. 189 levels[maxBits].needed = 2*n - 4 190 191 level := maxBits 192 for { 193 l := &levels[level] 194 if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { 195 // We've run out of both leafs and pairs. 196 // End all calculations for this level. 197 // To make sure we never come back to this level or any lower level, 198 // set nextPairFreq impossibly large. 199 l.needed = 0 200 levels[level+1].nextPairFreq = math.MaxInt32 201 level++ 202 continue 203 } 204 205 prevFreq := l.lastFreq 206 if l.nextCharFreq < l.nextPairFreq { 207 // The next item on this row is a leaf node. 208 n := leafCounts[level][level] + 1 209 l.lastFreq = l.nextCharFreq 210 // Lower leafCounts are the same of the previous node. 211 leafCounts[level][level] = n 212 l.nextCharFreq = list[n].freq 213 } else { 214 // The next item on this row is a pair from the previous row. 215 // nextPairFreq isn't valid until we generate two 216 // more values in the level below 217 l.lastFreq = l.nextPairFreq 218 // Take leaf counts from the lower level, except counts[level] remains the same. 219 copy(leafCounts[level][:level], leafCounts[level-1][:level]) 220 levels[l.level-1].needed = 2 221 } 222 223 if l.needed--; l.needed == 0 { 224 // We've done everything we need to do for this level. 225 // Continue calculating one level up. Fill in nextPairFreq 226 // of that level with the sum of the two nodes we've just calculated on 227 // this level. 228 if l.level == maxBits { 229 // All done! 230 break 231 } 232 levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq 233 level++ 234 } else { 235 // If we stole from below, move down temporarily to replenish it. 236 for levels[level-1].needed > 0 { 237 level-- 238 } 239 } 240 } 241 242 // Somethings is wrong if at the end, the top level is null or hasn't used 243 // all of the leaves. 244 if leafCounts[maxBits][maxBits] != n { 245 panic("leafCounts[maxBits][maxBits] != n") 246 } 247 248 bitCount := h.bitCount[:maxBits+1] 249 //make([]int32, maxBits+1) 250 bits := 1 251 counts := &leafCounts[maxBits] 252 for level := maxBits; level > 0; level-- { 253 // chain.leafCount gives the number of literals requiring at least "bits" 254 // bits to encode. 255 bitCount[bits] = counts[level] - counts[level-1] 256 bits++ 257 } 258 return bitCount 259 } 260 261 // Look at the leaves and assign them a bit count and an encoding as specified 262 // in RFC 1951 3.2.2 263 func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { 264 code := uint16(0) 265 for n, bits := range bitCount { 266 code <<= 1 267 if n == 0 || bits == 0 { 268 continue 269 } 270 // The literals list[len(list)-bits] .. list[len(list)-bits] 271 // are encoded using "bits" bits, and get the values 272 // code, code + 1, .... The code values are 273 // assigned in literal order (not frequency order). 274 chunk := list[len(list)-int(bits):] 275 276 h.lns.Sort(chunk) 277 for _, node := range chunk { 278 h.codes[node.literal] = toCode(reverseBits(code, uint8(n)), uint8(n)) 279 code++ 280 } 281 list = list[0 : len(list)-int(bits)] 282 } 283 } 284 285 // Update this Huffman Code object to be the minimum code for the specified frequency count. 286 // 287 // freq An array of frequencies, in which frequency[i] gives the frequency of literal i. 288 // maxBits The maximum number of bits to use for any literal. 289 func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { 290 if h.freqcache == nil { 291 h.freqcache = make([]literalNode, 300) 292 } 293 list := h.freqcache[:len(freq)+1] 294 // Number of non-zero literals 295 count := 0 296 // Set list to be the set of all non-zero literals and their frequencies 297 for i, f := range freq { 298 if f != 0 { 299 list[count] = literalNode{uint16(i), f} 300 count++ 301 } else { 302 list[count] = literalNode{} 303 //h.codeBits[i] = 0 304 h.codes[i].setBits(0) 305 } 306 } 307 list[len(freq)] = literalNode{} 308 // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros 309 // FIXME: Doesn't do what it says on the tin (klauspost) 310 //h.codeBits = h.codeBits[0:len(freq)] 311 312 list = list[0:count] 313 if count <= 2 { 314 // Handle the small cases here, because they are awkward for the general case code. With 315 // two or fewer literals, everything has bit length 1. 316 for i, node := range list { 317 // "list" is in order of increasing literal value. 318 h.codes[node.literal].set(uint16(i), 1) 319 //h.codeBits[node.literal] = 1 320 //h.code[node.literal] = uint16(i) 321 } 322 return 323 } 324 h.lfs.Sort(list) 325 326 // Get the number of literals for each bit count 327 bitCount := h.bitCounts(list, maxBits) 328 // And do the assignment 329 h.assignEncodingAndSize(bitCount, list) 330 } 331 332 type literalNodeSorter []literalNode 333 334 func (s *literalNodeSorter) Sort(a []literalNode) { 335 *s = literalNodeSorter(a) 336 sort.Sort(s) 337 } 338 339 func (s literalNodeSorter) Len() int { return len(s) } 340 341 func (s literalNodeSorter) Less(i, j int) bool { 342 return s[i].literal < s[j].literal 343 } 344 345 func (s literalNodeSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 346 347 type literalFreqSorter []literalNode 348 349 func (s *literalFreqSorter) Sort(a []literalNode) { 350 *s = literalFreqSorter(a) 351 sort.Sort(s) 352 } 353 354 func (s literalFreqSorter) Len() int { return len(s) } 355 356 func (s literalFreqSorter) Less(i, j int) bool { 357 if s[i].freq == s[j].freq { 358 return s[i].literal < s[j].literal 359 } 360 return s[i].freq < s[j].freq 361 } 362 363 func (s literalFreqSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] }